/
example.yaml
223 lines (168 loc) · 8.08 KB
/
example.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# This file serves as a starting example input file for Allegro
# For a full, detailed set of general training+dataset options see configs/full.yaml in the NequIP repo:
# https://github.com/mir-group/nequip/blob/main/configs/full.yaml
# This file additionally documents the Allegro-specific options
# general
# Two folders will be used during the training: 'root'/process and 'root'/'run_name'
# run_name contains logfiles and saved models
# process contains processed data sets
# if 'root'/'run_name' exists, 'root'/'run_name'_'year'-'month'-'day'-'hour'-'min'-'s' will be used instead.
root: results/aspirin
run_name: example
# model initialization seed
seed: 123456
# data set seed, determines which data to sample from file
dataset_seed: 123456
# set true if a restarted run should append to the previous log file
append: true
# type of float to use, e.g. float32 and float64
default_dtype: float32
# -- network --
# tell nequip which modules to build
model_builders:
- allegro.model.Allegro
# the typical model builders from `nequip` can still be used:
- PerSpeciesRescale
- ForceOutput
- RescaleEnergyEtc
# radial cutoff in length units
r_max: 6.0
# average number of neighbors in an environment is used to normalize the sum, auto precomputed it automitcally
avg_num_neighbors: auto
# radial basis
# set true to train the bessel roots
BesselBasis_trainable: true
# p-parameter in envelope function, as proposed in Klicpera, J. et al., arXiv:2003.03123
# sets it BOTH for the RadialBasisProjection AND the Allegro_Module
PolynomialCutoff_p: 6
# symmetry
# maximum order l to use in spherical harmonics embedding, 1 is basedline (fast), 2 is more accurate, but slower, 3 highly accurate but slow
l_max: 2
# whether to include E(3)-symmetry / parity
# allowed: o3_full, o3_restricted, so3
parity: o3_full
# number of tensor product layers, 1-3 usually best, more is more accurate but slower
num_layers: 2
# number of features, more is more accurate but slower, 1, 4, 8, 16, 64, 128 are good options to try depending on data set
env_embed_multiplicity: 64
# whether or not to embed the initial edge, true often works best
embed_initial_edge: true
# hidden layer dimensions of the 2-body embedding MLP
two_body_latent_mlp_latent_dimensions: [128, 256, 512, 1024]
# nonlinearity used in the 2-body embedding MLP
two_body_latent_mlp_nonlinearity: silu
# weight initialization of the 2-body embedding MLP
two_body_latent_mlp_initialization: uniform
# hidden layer dimensions of the latent MLP
# these MLPs are cheap if you have have large l/env_embed_multiplicity, so a good place to put model capacity if you can afford it
# only if you are in the ultra-fast/scalable regime, make these smaller
latent_mlp_latent_dimensions: [1024, 1024, 1024]
# nonlinearity used in the latent MLP
latent_mlp_nonlinearity: silu
# weight initialization of the latent MLP
latent_mlp_initialization: uniform
# whether to use a resnet update in the scalar latent latent space, true works best usually
latent_resnet: true
# hidden layer dimensions of the environment embedding mlp, none work best (will build a single linear layer)
env_embed_mlp_latent_dimensions: []
# nonlinearity used in the environment embedding mlp
env_embed_mlp_nonlinearity: null
# weight initialzation of the environment embedding mlp
env_embed_mlp_initialization: uniform
# - end allegro layers -
# Final MLP to go from Allegro latent space to edge energies:
# hidden layer dimensions of the per-edge energy final MLP
edge_eng_mlp_latent_dimensions: [128]
# nonlinearity used in the per-edge energy final MLP
edge_eng_mlp_nonlinearity: null
# weight initialzation in the per-edge energy final MLP
edge_eng_mlp_initialization: uniform
# -- data --
# there are two options to specify a dataset, npz or ase
# npz works with npz files, ase can ready any format that ase.io.read can read
# IMPORTANT: in most cases working with the ase option and an extxyz file is by far the simplest way to do it and we strongly recommend using this
# simply provide a single extxyz file that contains the structures together with energies and forces (generated with ase.io.write(atoms, format='extxyz', append=True))
# for a simple snippet to do this, see the gists here: https://github.com/simonbatzner
# npz option
dataset: npz # type of data set, can be npz or ase
dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip # url to download the npz. optional
dataset_file_name: ./benchmark_data/aspirin_ccsd-train.npz # path to data set file
key_mapping:
z: atomic_numbers # atomic species, integers
E: total_energy # total potential eneriges to train to
F: forces # atomic forces to train to
R: pos # raw atomic positions
npz_fixed_field_keys: # fields that are repeated across different examples
- atomic_numbers
# ase option
# dataset: ase
# dataset_file_name: filename.extxyz
# ase_args:
# format: extxyz
# A mapping of chemical species to type indexes is necessary if the dataset is provided with atomic numbers instead of type indexes.
chemical_symbol_to_type:
H: 0
C: 1
O: 2
# logging
# whether to use weight and biases (see wandb.ai)
wandb: false
# project name in wandb
wandb_project: aspirin
# the same as python logging, e.g. warning, info, debug, error. case insensitive
verbose: debug
# training
# number of training samples to use
n_train: 950
# number of validation samples to use
n_val: 50
# batch size, we found it important to keep this small for most applications including forces (1-5); for energy-only training, higher batch sizes work better
batch_size: 5
# stop training after _ number of epochs, we set a very large number here, it won't take this long in practice and we will use early stopping instead
max_epochs: 1000000
# learning rate, we found values between 0.002 and 0.0005 to work best - this is often one of the most important hyperparameters to tune
learning_rate: 0.001
# can be random or sequential. if sequential, first n_train elements are training, next n_val are val, else random, usually random is the right choice
train_val_split: random
# If true, the data loader will shuffle the data, almost always a good idea
shuffle: true
# metrics used for scheduling and saving best model. Options: `set`_`quantity`, set can be either "train" or "validation, "quantity" can be loss or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
metrics_key: validation_loss
# use an exponential moving average of the weights
# if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
use_ema: true
# ema weight, typically set to 0.99 or 0.999
ema_decay: 0.99
# whether to use number of updates when computing averages
ema_use_num_updates: true
# loss function
# different weights to use in a weighted loss functions
# if you use peratommseloss, then this is already in a per-atom normalized space (both E/F are per-atom quantities)
# in that case, 1:1 works best usually
loss_coeffs:
forces: 1.
total_energy:
- 1.
- PerAtomMSELoss
# optimizer
# default optimizer is Adam
optimizer_name: Adam
optimizer_params:
amsgrad: false
betas: !!python/tuple
- 0.9
- 0.999
eps: 1.0e-08
weight_decay: 0.
# lr scheduler, drop lr if no improvement for 50 epochs
# on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
lr_scheduler_name: ReduceLROnPlateau
lr_scheduler_patience: 50
lr_scheduler_factor: 0.5
# early stopping if max 7 days is reached or lr drops below 1e-5 or no improvement on val loss for 100 epochs
early_stopping_upper_bounds:
cumulative_wall: 604800.
early_stopping_lower_bounds:
LR: 1.0e-5
early_stopping_patiences:
validation_loss: 100