# This is part of the supporting information for the paper  
*ParAMS: Parameter Fitting for Atomistic and Molecular Models* (DOI: *123123*)  
The full documentation can be found at https://www.scm.com/doc.trunk/params/index.html

# SCC-DFTB repulsive potential parametrization

Set num_processes to the number of processors on your machine. The DFTB calculations will be parallelized over that many cores.

In [9]:
import os, sys
import numpy as np
from os.path    import join as opj
from scm.params import *
from scm.params import __version__ as paramsver
from scm.plams import *

num_processes = 8
include_bulkmodulus = True
recalculate_reference_data = False
load_precalculated = True

INDIR = '../data/dftb'
if not os.path.exists(INDIR):
    os.makedirs(INDIR)
print(os.getcwd())

if load_precalculated:
    jcfile = opj(INDIR, 'precalculated_jobcollection.yml')
    trainingsetfile = opj(INDIR, 'precalculated_trainingset.yml') # calculated with BAND
    assert(os.path.exists(jcfile))
    assert(os.path.exists(trainingsetfile))
else:
    suffix = ''
    if include_bulkmodulus:
        suffix += '_bm'
    if recalculate_reference_data:
        suffix += '_recalc'
    jcfile = opj(INDIR, 'jobcollection{}.yml'.format(suffix))
    trainingsetfile = opj(INDIR, 'trainingset{}.yml'.format(suffix))

    i = 1
    while os.path.exists(jcfile) or os.path.exists(trainingsetfile):
        i += 1
        jcfile = opj(INDIR, 'jobcollection{}.yml.{:03d}'.format(suffix, i))
        trainingsetfile = opj(INDIR, 'trainingset{}.yml.{:03d}'.format(suffix, i))

print(f"ParAMS Version used: {paramsver}")
print("Training set file: {}".format(trainingsetfile))
print("Job collection file: {}".format(jcfile))

/home/leo/Documents/papers/params/SI/notebooks
ParAMS Version used: 0.5.0
Training set file: ../data/dftb/precalculated_trainingset.yml
Job collection file: ../data/dftb/precalculated_jobcollection.yml



# Step 1: Define the job collection
This adds lattice optimizations of the wurtzite and rocksalt polymorphs of ZnO to the job collection.

For wurtzite, the elastic tensor is calculated. From the output, the bulk modulus can then be extracted.

The job collection is stored in jobcollection.yml.

In [None]:
jc = JobCollection()
if load_precalculated:
    jc.load(jcfile)
else:
    wurtzite, rocksalt = Molecule(opj(INDIR, 'w.xyz')), Molecule(opj(INDIR, 'rs.xyz')) 

    # for more information about the input settings, see the AMS manual
    w_opt_s = Settings()
    w_opt_s.input.ams.Task = 'GeometryOptimization'
    w_opt_s.input.ams.GeometryOptimization.OptimizeLattice = 'Yes'
    #w_opt_s.input.ams.GeometryOptimization.MaxIterations = 100
    #w_opt_s.input.ams.GeometryOptimization.PretendConverged = 'Yes'
    w_opt_s.input.ams.Constraints.FreezeStrain = 'xz yz xz'
    w_opt_s.input.ams.Constraints.EqualStrain = 'xx yy'
    if include_bulkmodulus:
        w_opt_s.input.ams.Properties.ElasticTensor = 'Yes' # to get bulk modulus of wurtzite
        w_opt_s.input.ams.GeometryOptimization.Convergence.Gradients = 8e-5

    rs_opt_s = w_opt_s.copy()
    rs_opt_s.input.ams.Constraints.EqualStrain = 'xx yy zz'

    jc.add_entry('wurtzite_lattopt', JCEntry(w_opt_s, wurtzite))
    jc.add_entry('rocksalt_lattopt', JCEntry(rs_opt_s, rocksalt))

    jc.store(jcfile)

print("### Job collection ###")
print(jc)

# Step 2: Define the training set
There are four target quantities
* $a$ wurtzite lattice parameter, 
* $c$ wurtzite lattice parameter, 
* $B_0$ wurtzite bulk modulus, and
* and $\Delta E$ = relative energy between the wurtzite and rocksalt polymorphs (per ZnO formula unit).

**If you set recalculate_reference_data to True**, the AMS BAND periodic DFT software will be used to run the reference jobs and calculate the reference data. Any engine, or combination of different engines in the Amsterdam Modeling Suite, can be used to seamlessly calculate the reference data, if the reference values are not known beforehand. NOTE: It may take many hours to calculate the reference data.

**Otherwise**, DFT-calculated reference values are taken from https://doi.org/10.1021/jp404095x

In [None]:

training_set = DataSet()
if load_precalculated:
    training_set.load(trainingsetfile)
else:
    if recalculate_reference_data:
        if include_bulkmodulus:
            training_set.add_entry('bulkmodulus("wurtzite_lattopt")', weight=0.5, reference=None) 
        training_set.add_entry('lattice("wurtzite_lattopt", 0)', weight=1, reference=None) 
        training_set.add_entry('lattice("wurtzite_lattopt", 2)', weight=1, reference=None)
        training_set.add_entry('energy("wurtzite_lattopt")/2.0-energy("rocksalt_lattopt")', weight=1, reference=None)
        band_settings = Settings()
        band_settings.input.band.basis.type = 'TZP'
        band_settings.input.band.numericalquality = 'Good'
        band_settings.input.band.xc.libxc = 'PBE'
        band_settings.runscript.nproc = num_processes
        init(path=INDIR, folder='band_reference_data')
        reference_results = jc.run(engine_settings=band_settings, use_pipe=False)
        finish()
        training_set.calculate_reference(reference_results)
    else:
        if include_bulkmodulus:
            training_set.add_entry('bulkmodulus("wurtzite_lattopt")', weight=0.5, reference=129) # GPa
        training_set.add_entry('lattice("wurtzite_lattopt", 0)', weight=1, reference=3.29) # a, angstrom
        training_set.add_entry('lattice("wurtzite_lattopt", 2)', weight=1, reference=5.31) # c, angstrom
        training_set.add_entry('energy("wurtzite_lattopt")/2.0-energy("rocksalt_lattopt")', weight=1, reference=-0.30/27.211)
    training_set.store(trainingsetfile)

print("### Training set ###")
print(training_set)


Set the settings for the parametrized DFTB engine. Here, we set the k-space quality to 'Good', which is important for lattice optimizations.

In [None]:
dftb_s = Settings()
dftb_s.input.dftb.kspace.quality = 'Good'

Create a "parameter interface" to the DFTB repulsive potential. 

Repulsive potentials are stored as splines towards the end of Slater-Koster (.skf) files.

Here, we optimize only the Zn-O and O-Zn repulsive potentials (which must be identical).

* Take electronic parameters and unchanged repulsive potentials (e.g. O-O.skf) from AMSHOME/atomicdata/DFTB/DFTB.org/znorg-0-1

* Define an analytical repulsive function. Here, we choose a tapered double exponential of the form $V^{\text{rep}}(r) = f^{\text{cut}}(r)\left[p_0\exp(-p_1r) + p_2\exp(-p_3r)\right]$, where $p_0, p_1, p_2, p_3$ are the parameters to be fitted, and $f^\text{cut}(r)$ is a smoothly decaying cutoff function decaying to 0 at $r = 5.67$ bohr.

* r_range specifies for which distances to write the repulsive potential, and spline parameters, to the new O-Zn.skf and Zn-O.skf files.

* Only optimize parameters for the O-Zn pair. Note: The Zn-O repulsive potential will be identical to the O-Zn one. When specifying active parameters for a DFTBSplineRepulsivePotentialParams, the elements must be ordered alphabetically.

* Define initial values and allowed ranges for the parameter values.

In [None]:
interface = DFTBSplineRepulsivePotentialParams(
    folder=opj(os.environ['AMSHOME'],'atomicdata', 'DFTB', 'DFTB.org', 'znorg-0-1'), 
    repulsive_function=TaperedDoubleExponential(cutoff=5.67), 
    r_range=np.arange(0., 5.87, 0.1), 
    other_settings=dftb_s
)
for p in interface:    
    p.is_active = p.name.startswith('O-Zn:')

print("### Active parameters ###")
interface.active.x = [0.5, 1.0, 0.3, 0.3] # initial values
interface.active.range = [ (0.,4.), (0.,10.), (0.,4.), (0.,10) ]
for p in interface.active:
    print(p)


# Step 3: Run the optimization
* Specify a Nelder-Mead optimizer from scipy.

In [None]:

optimizer = Scipy(method='Nelder-Mead')

optimization = Optimization(jc, 
                            training_set, 
                            interface, 
                            optimizer, 
                            title="ZnO_repulsive_opt",
                            use_pipe=True, 
                            parallel=ParallelLevels(processes=num_processes), 
                            callbacks=[Logger(printfreq=1,
                                              writefreq_history=1,
                                              writefreq_datafiles=1,
                                              writefreq_bestparams=1
                                             ),
                                      TimePerEval(printfrequency=10)])


optimization.summary()
results = optimization.optimize()

# Step 4: Find the results
* ZnO_repulsive_opt/trainingset_history.dat contains the loss function value and parameters for each iteration
* ZnO_repulsive_opt/data/predictions/trainingset contains the individual predictions ($a$, $c$, $B_0$ and $\Delta E$) for each parameter set
* ZnO_repulsive_opt/data/contributions/trainingset contains the fraction of the total loss function value for each item in the training set, for each parameter set

# Appendix: Recalculate training set with arbitrary engines (e.g. UFF)

In [None]:

uff_s = Settings()
uff_s.input.forcefield

znorg_s = Settings()
znorg_s.input.dftb.model = 'SCC-DFTB'
znorg_s.input.dftb.resourcesdir = 'DFTB.org/znorg-0-1'
znorg_s.input.dftb.kspace.quality = 'Good'
znorg_s.runscript.nproc = num_processes

engines_dict = {'UFF': uff_s, 'znorg-0-1': znorg_s}
print("Engine", end='')
for x in training_set:    print(" {}".format(x.expression), end='')
print("\nRef.", end='')
for x in training_set:    print(" {}".format(x.reference), end='')
print("\n")
for engine, sett in engines_dict.items():
    init(opj(INDIR, 'other_engines'), folder=engine)
    results = jc.run(sett, use_pipe=False)
    finish()
    fx, residuals, contributions = training_set.evaluate(results, return_residuals=True)
    predictions = training_set.get_predictions(residuals)
    print(engine, end='')
    for e,v in predictions:
        print(" {:.3f} ".format(v[0]), end='')
    print("\n")