In [1]:
import os, sys
import numpy as np
from os.path    import join as opj
from itertools  import combinations
from scm.params import *
from scm.params import __version__ as paramsver
print(f"ParAMS Version used: {paramsver}")

INDIR = '../data'
if not os.path.exists(INDIR):
    os.makedirs(INDIR)

ParAMS Version used: 0.3



# Step 0: Auxiliary functions
The following functions are required additionally required for the optimization.

Müller and Hartke provide the reference gradients as external files. This function adds them to the data set

In [2]:
def add_grads(path, dataset):
    for i in os.listdir(path):
        if i.endswith('gradient'):
            name = i.rstrip('.gradient')
            grads = np.loadtxt(opj(path, i), skiprows=1, usecols=(1,2,3))
            for id,at in enumerate(grads):
                for xyz,value in enumerate(at):
                    dataset.add_entry(f'force("{name}", {id}, {xyz})', weight=0.01, reference=-value)

ParAMS can handle constraints, if those are known. The following constraints the covalent raddii parameter to $r_0^\sigma \geq r_0^\pi$, $r_0^\pi \geq r_0^{\pi\pi}$ for every atom pair:

In [3]:
def get_constraints(x):
    pre_atoms = ['C:', 'H:', 'O:', 'S:']
    pre_offd = [f"{'.'.join(i)}:" for i in combinations(['C', 'H', 'O', 'S'], 2)]
    pre_bnds = pre_offd + ['C.C:', 'H.H:', 'O.O:', 'S.S:']
    const = []
    for pre in pre_atoms:
        sigma = f"{pre}r_0^sigma;;2;;Sigma bond covalent radius"
        pi    = f"{pre}r_0^pi;;2;;Pi bond covalent radius"
        pi2   = f"{pre}r_0^pi;;2;;Double pi bond covalent radius"
        const += [x[sigma] >= x[pi], x[pi] >= x[pi2]]
    for pre in pre_offd:
        sigma = f"{pre}r_0^sigma;;2;;Sigma bond length"
        pi    = f"{pre}r_0^pi;;2;;Pi bond length"
        pi2   = f"{pre}r_0^pipi;;2;;PiPi bond length"
        const += [x[sigma] >= x[pi], x[pi] >= x[pi2]]
    return const

# Step 1: Convert from the old ReaxFF format to ParAMS

We start with the job collection:

In [4]:
jc1 = geo_to_params('../MH/optInput/geo', normal_run_settings='../MH/control')
jc2 = geo_to_params('../MH/valSet/geo',   normal_run_settings='../MH/control')

print('The following jobIDs are in *both* the training and validation sets:')
print("\n".join([i for i in jc1.keys() if i in jc2])+'\n')

The following jobIDs are in *both* the training and validation sets:
dmds
s8
dpds
dpods



Join the sets into one job collection, tell AMS that Gradients need to be computed and append the link to the original publication in the metadata:

In [5]:
jc  = jc1 + jc2

for e in jc.values():
    e.metadata['Source'] = 'https://doi.org/10.1021/acs.jctc.6b00461'
    e.settings.input.ams.properties.gradients = True 
    
jc.store(opj(INDIR, 'jobcollection.yml'))

Now convert the data sets:

In [6]:
train_set = trainset_to_params('../MH/optInput/trainset.in')
val_set  =  trainset_to_params('../MH/valSet/trainset.in')
add_grads('../MH/optInput/grads', train_set)

for e in train_set+val_set:
    e.metadata['Source'] = 'https://doi.org/10.1021/acs.jctc.6b00461'

train_set.store(opj(INDIR, 'trainingset.yml'))
val_set.store(  opj(INDIR, 'validationset.yml'))

# Step 2: Calculate the Loss value for $x_0$

Our parameter interface that will be parameterized: ReaxFF

In [7]:
x0 = ReaxParams('../MH/mue2016', 1.25) # Scale bounds by +-1.25x0

In [8]:
print('Running x0 ...')
engine = x0.get_engine()
r  = jc.run(engine.settings)
fx = train_set.evaluate({i.name : i for i in r})
print(f'Training Set   f(x) = {fx:.3e}')
fx = val_set.evaluate({i.name : i for i in r},)
print(f'Validation Set f(x) = {fx:.3e}\n')
print('Published training set value is 12393\n(https://doi.org/10.1021/acs.jctc.6b00461)')

Running x0 ...
Training Set   f(x) = 1.257e+04
Validation Set f(x) = 8.936e+03

Published training set value is 12393
(https://doi.org/10.1021/acs.jctc.6b00461)


# Step 3: Select the most sensitive parameters

In [9]:
print('Searching for the most sensitive parameters ...')
if not os.path.exists('aps.npz'):
    aps    = ActiveParameterSearch(x0, val_set, jc)
    aps.scan(steps=[1.05], verbose=False)
    aps.save('aps.npz')
else:
    aps = ActiveParameterSearch(x0, val_set, jc, 'aps.npz')
print('Done')

Searching for the most sensitive parameters ...
Done


In [10]:
print(f"Total number of  parameters before the scan: {len(x0)}")
print(f"Number of active parameters before the scan: {x0.is_active.count(True)}")
x0.is_active = aps.get_is_active(50)
print(f"Number of active parameters after  the scan: {x0.is_active.count(True)}")

Total number of  parameters before the scan: 701
Number of active parameters before the scan: 619
Number of active parameters after  the scan: 50


# Step 4: Start the optimization

In [11]:
o            = CMAOptimizer(popsize=15, sigma=0.5)
callbacks    = [Logger(), Timeout(60*60*8), TimePerEval(10), EarlyStopping(6000)]
constraints  = get_constraints(x0)
optimization = Optimization(jc, [train_set, val_set], x0, o, callbacks=callbacks, constraints=constraints)
optimization.summary()

Optimization() Instance Settings:
Workdir:                           opt
JobCollection size:                458
Interface:                         ReaxParams
Active parameters:                 50
Optimizer:                         CMAOptimizer
Callbacks:                         Logger
                                   Timeout
                                   TimePerEval
                                   EarlyStopping
Constraints:                       Constraint(p[39]<=p[45])
                                   Constraint(p[45]<=p[55])
                                   Constraint(p[71]<=p[77])
                                   Constraint(p[77]<=p[87])
                                   Constraint(p[103]<=p[109])
                                   Constraint(p[109]<=p[119])
                                   Constraint(p[135]<=p[141])
                                   Constraint(p[141]<=p[151])
                                   Constraint(p[330]<=p[331])
                         

The following will start the optimization (we will not show the output, as running it thourgh Jupyer is buggy):

In [13]:
# optimization.optimize()