In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import copy
import os
import pickle
import sys
import numpy as np
import json

np.set_printoptions(precision=3, suppress=True)
# uncomment and replace with correct path if there are import errors
# sys.path.append("/path/to/surface-sampling/")
# sys.path.append("/path/to/NeuralForceField/")
# os.environ["LAMMPS_POTENTIALS"] = "/path/to/lammps/potentials/"
# os.environ["LAMMPS_COMMAND"] ="/path/to/lammps/src/lmp_serial"
# os.environ["ASE_LAMMPSRUN_COMMAND"] = os.environ["LAMMPS_COMMAND"]

from mcmc import MCMC
from mcmc.system import SurfaceSystem

from time import perf_counter

Initialize test slab and parameters

In [4]:
# Load prepared pristine slab
element = []
slab_pkl = open("data/SrTiO3_001_2x2_pristine_slab.pkl", "rb")
slab = pickle.load(slab_pkl)
offset_data_path = os.path.join(
    os.getcwd(),
    "data/nff",
    "offset_data.json",
)

In [5]:
system_settings = {
    "surface_name": "SrTiO3(001)",
    "cutoff": 5.0,
    "near_reduce": 0.01,
    "planar_distance": 1.55,
    "no_obtuse_hollow": True,
}

sampling_settings = {
    "alpha": 1.0,  # no annealing
    "temperature": 1.0,  # in terms of kbT
    "num_sweeps": 10,
    "sweep_size": 5,
}

calc_settings = {
    "calc_name": "NFF",
    "chem_pots": {"Sr": -2, "Ti": 0, "O": 0},
    "offset_data": json.load(open(offset_data_path, "r")),
    "optimizer": "BFGS",
    "relax_atoms": True,
    "relax_steps": 5,
    "offset": True,
}

Obtain adsorption sites

In [6]:
from pymatgen.analysis.adsorption import AdsorbateSiteFinder
from pymatgen.io.ase import AseAtomsAdaptor

pristine_slab = slab.copy()
pristine_pmg_slab = AseAtomsAdaptor.get_structure(pristine_slab)
site_finder = AdsorbateSiteFinder(pristine_pmg_slab)
# new standardized params
# ads_positions = site_finder.find_adsorption_sites(
#     put_inside=True,
#     symm_reduce=False,
#     near_reduce=system_settings['near_reduce'],
#     distance=system_settings["planar_distance"],
#     no_obtuse_hollow=system_settings['no_obtuse_hollow'],
# )["all"]

# old params
sites = site_finder.find_adsorption_sites(
    distance=1.55, put_inside=False, symm_reduce=False, no_obtuse_hollow=False
)
ads_positions = sites["all"]

print("adsorption coordinates are")
print(ads_positions)

sweep_size = len(ads_positions)

adsorption coordinates are
[array([-0.   , -0.029, 18.87 ]), array([ 1.968,  1.951, 18.829]), array([ 1.968,  0.146, 18.782]), array([-0.   ,  3.956, 18.87 ]), array([ 1.968,  5.936, 18.829]), array([ 1.968,  4.131, 18.782]), array([ 3.936, -0.029, 18.87 ]), array([ 5.903,  1.951, 18.829]), array([ 5.903,  0.146, 18.782]), array([ 3.936,  3.956, 18.87 ]), array([ 5.903,  5.936, 18.829]), array([ 5.903,  4.131, 18.782]), array([ 9.839, 30.936, 18.806]), array([ 8.855, 30.848, 18.85 ]), array([29.517, 30.936, 18.806]), array([30.5  , 30.848, 18.85 ]), array([14.758, 15.998, 18.826]), array([15.742, 13.906, 18.829]), array([30.5  , 28.856, 18.85 ]), array([ 8.855, 28.856, 18.85 ]), array([10.823, 30.848, 18.85 ]), array([14.758, 24.871, 18.85 ]), array([16.726, 24.871, 18.85 ]), array([15.742, 25.861, 18.829]), array([30.5  , 27.953, 18.826]), array([29.517, 28.943, 18.806]), array([28.533, 28.856, 18.85 ]), array([28.533, 27.953, 18.826]), array([12.79 , 15.998, 18.826]), array([12.79 , 

Set up NFF (calculator). We are using neural network weights from our Zenodo dataset (https://zenodo.org/record/7927039). The ensemble requires an `offset_data.json` file

In [9]:
import torch
from nff.io.ase import AtomsBatch
from nff.io.ase_calcs import NeuralFF, EnsembleNFF
from mcmc.calculators import EnsembleNFFSurface

if torch.cuda.is_available():
    DEVICE = 0
else:
    DEVICE = "cpu"

# requires an ensemble of models in this path and an `offset_data.json` file
nnids = ["model01", "model02", "model03"]
model_dirs = [
    os.path.join(
        os.getcwd(),
        "data/nff",
        str(x),
        "best_model",
    )
    for x in nnids
]

models = []
for modeldir in model_dirs:
    m = NeuralFF.from_file(modeldir, device=DEVICE).model
    models.append(m)

nff_surf_calc = EnsembleNFFSurface(models, device=DEVICE)
nff_surf_calc.set(**calc_settings)

offset data: {'bulk_energies': {'O': -0.17747231201, 'Sr': -0.06043637668, 'SrTiO3': -1.470008697358702}, 'stoidict': {'Sr': 0.49995161381315867, 'Ti': -0.0637500349111578, 'O': -0.31241304903276834, 'offset': -11.324476454433157}, 'stoics': {'Sr': 1, 'Ti': 1, 'O': 3}, 'ref_formula': 'SrTiO3', 'ref_element': 'Ti'} is set from parameters
chemical potentials: {'Sr': -2, 'Ti': 0, 'O': 0} are set from parameters
offset data: {'bulk_energies': {'O': -0.17747231201, 'Sr': -0.06043637668, 'SrTiO3': -1.470008697358702}, 'stoidict': {'Sr': 0.49995161381315867, 'Ti': -0.0637500349111578, 'O': -0.31241304903276834, 'offset': -11.324476454433157}, 'stoics': {'Sr': 1, 'Ti': 1, 'O': 3}, 'ref_formula': 'SrTiO3', 'ref_element': 'Ti'} is set from parameters


Initialize surface system

In [10]:
# set attributes
slab_batch = AtomsBatch(
    positions=slab.positions,
    numbers=slab.numbers,
    cell=slab.cell,
    pbc=True,
    cutoff=system_settings["cutoff"],
    props={"energy": 0, "energy_grad": []},
    calculator=nff_surf_calc,
    requires_large_offsets=True,
    directed=True,
    device=DEVICE,
)

# fix bulk atoms
from ase.constraints import FixAtoms

num_bulk_atoms = len(slab_batch)
bulk_indices = list(range(num_bulk_atoms))
print(f"bulk indices {bulk_indices}")
surf_indices = slab.get_surface_atoms()

fix_indices = list(set(bulk_indices) - set(surf_indices))
print(f"fix indices {fix_indices}")

c = FixAtoms(indices=fix_indices)
slab_batch.set_constraint(c)

surface = SurfaceSystem(
    slab_batch, ads_positions, nff_surf_calc, system_settings=system_settings
)
surface.all_atoms.write("SrTiO3_001_2x2_all_virtual_ads.cif")

bulk indices [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
fix indices [0, 1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58]
ads coords is [-0.    -0.029 18.87 ]
ads coords is [ 1.968  1.951 18.829]
ads coords is [ 1.968  0.146 18.782]
ads coords is [-0.     3.956 18.87 ]
ads coords is [ 1.968  5.936 18.829]
ads coords is [ 1.968  4.131 18.782]
ads coords is [ 3.936 -0.029 18.87 ]
ads coords is [ 5.903  1.951 18.829]
ads coords is [ 5.903  0.146 18.782]
ads coords is [ 3.936  3.956 18.87 ]
ads coords is [ 5.903  5.936 18.829]
ads coords is [ 5.903  4.131 18.782]
ads coords is [ 9.839 30.936 18.806]
ads coords is [ 8.855 30.848 18.85 ]
ads coords is [29.517 30.936 18.806]
ads 

2024-03-08 11:20:12,742|INFO|initial state is [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
2024-03-08 11:20:12,743|INFO|number of pristine atoms is 60
2024-03-08 11:20:12,745|INFO|bulk indices are [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59]
2024-03-08 11:20:12,745|INFO|surface indices are []


In [11]:
surface.get_potential_energy()

array([-467.541], dtype=float32)

In [12]:
surface.calc.offset_data

{'bulk_energies': {'O': -0.17747231201,
  'Sr': -0.06043637668,
  'SrTiO3': -1.470008697358702},
 'stoidict': {'Sr': 0.49995161381315867,
  'Ti': -0.0637500349111578,
  'O': -0.31241304903276834,
  'offset': -11.324476454433157},
 'stoics': {'Sr': 1, 'Ti': 1, 'O': 3},
 'ref_formula': 'SrTiO3',
 'ref_element': 'Ti'}

In [13]:
from mcmc.energy import slab_energy

slab_energy(surface, offset=True, offset_data=calc_settings["offset_data"])

      Step     Time          Energy         fmax
BFGS:    0 11:20:16     -467.521881        0.0000


(12.49005126953125, 0.30545470118522644, 0.0, 0.0, [])

Perform MCMC and view results. Detailed results can be found in the corresponding run in the `SrTiO3(001)/` folder.

In [14]:
mcmc = MCMC(
    system_settings["surface_name"],
    calc=nff_surf_calc,
    canonical=False,
    testing=False,
    element=element,
    adsorbates=list(calc_settings["chem_pots"].keys()),
    relax=calc_settings["relax_atoms"],
    relax_steps=calc_settings["relax_steps"],
    offset=calc_settings["offset"],
    offset_data=calc_settings["offset_data"],
    optimizer=calc_settings["optimizer"],
)  # relax atoms with manual offset TODO fix this!

start = perf_counter()
# call the main function
# TODO: convert input to sampling settings
mcmc.mcmc_run(
    total_sweeps=sampling_settings["num_sweeps"],
    sweep_size=sampling_settings["sweep_size"],
    start_temp=sampling_settings["temperature"],
    pot=list(calc_settings["chem_pots"].values()),
    alpha=sampling_settings["alpha"],
    surface=surface,
)
stop = perf_counter()
print(f"Time taken = {stop - start} seconds")

2024-03-08 11:20:18,545|INFO|there are 60 atoms in pristine slab
2024-03-08 11:20:18,547|INFO|Running with num_sweeps = 10, temp = 1.0, pot = [-2, 0, 0], alpha = 1.0
2024-03-08 11:20:18,548|INFO|In pristine slab, there are a total of 72 sites


      Step     Time          Energy         fmax
BFGS:    0 11:20:18     -467.521881        0.0000


2024-03-08 11:20:18,721|INFO|running for 5 iterations per run over a total of 10 runs
2024-03-08 11:20:18,722|INFO|starting with iteration 0
2024-03-08 11:20:18,723|INFO|In sweep 1 out of 10


temp list is:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
      Step     Time          Energy         fmax
BFGS:    0 11:20:18     -465.896942       13.8634
BFGS:    1 11:20:18     -467.905975        7.3427
BFGS:    2 11:20:18     -468.976654        3.5292
BFGS:    3 11:20:19     -469.369293        1.5990
BFGS:    4 11:20:19     -469.440704        1.6101
BFGS:    5 11:20:19     -469.525146        1.7815
      Step     Time          Energy         fmax
BFGS:    0 11:20:19     -470.168365        8.9101
BFGS:    1 11:20:19     -471.131104        6.0056
BFGS:    2 11:20:19     -471.827637        2.4612
BFGS:    3 11:20:19     -472.051666        2.6320
BFGS:    4 11:20:19     -472.545776        2.3643
BFGS:    5 11:20:19     -472.798431        2.1017
      Step     Time          Energy         fmax
BFGS:    0 11:20:19     -470.253082        1.8847
BFGS:    1 11:20:20     -470.305908        2.0418
BFGS:    2 11:20:20     -470.770874        2.2253
BFGS:    3 11:20:20     -471.056732        0.5441
BFGS:  

2024-03-08 11:20:22,052|INFO|current energy is 13.7686767578125, calculated energy is 13.7686767578125
2024-03-08 11:20:22,052|INFO|optim structure has Energy = 13.769+/-0.173
2024-03-08 11:20:22,053|INFO|average force error = 0.000
2024-03-08 11:20:22,060|INFO|In sweep 2 out of 10


      Step     Time          Energy         fmax
BFGS:    0 11:20:22     -473.051239        1.9986
BFGS:    1 11:20:22     -473.171356        2.2094
BFGS:    2 11:20:22     -474.119019        2.1273
BFGS:    3 11:20:22     -474.633698        0.4879
BFGS:    4 11:20:22     -474.640869        0.4057
BFGS:    5 11:20:22     -474.655304        0.0630
      Step     Time          Energy         fmax
BFGS:    0 11:20:22     -476.341675        3.0027
BFGS:    1 11:20:22     -476.470703        2.9610
BFGS:    2 11:20:22     -476.986694        2.2353
BFGS:    3 11:20:23     -477.283813        1.9308
BFGS:    4 11:20:23     -477.564453        2.8352
BFGS:    5 11:20:23     -478.086914        3.0725
      Step     Time          Energy         fmax
BFGS:    0 11:20:23     -467.521881        0.0000
      Step     Time          Energy         fmax
BFGS:    0 11:20:23     -465.764984       25.1198
BFGS:    1 11:20:23     -468.461304        8.0095
BFGS:    2 11:20:23     -469.308319        9.1920
BFGS

2024-03-08 11:20:24,666|INFO|current energy is 12.49005126953125, calculated energy is 12.49005126953125
2024-03-08 11:20:24,667|INFO|optim structure has Energy = 12.490+/-0.305
2024-03-08 11:20:24,668|INFO|average force error = 0.000
2024-03-08 11:20:24,676|INFO|In sweep 3 out of 10


      Step     Time          Energy         fmax
BFGS:    0 11:20:24     -468.645630       10.3694
BFGS:    1 11:20:24     -469.699677        4.5055
BFGS:    2 11:20:24     -470.087982        2.1835
BFGS:    3 11:20:25     -470.261749        1.2017
BFGS:    4 11:20:25     -470.409454        1.5427
BFGS:    5 11:20:25     -470.575409        0.5548
      Step     Time          Energy         fmax
BFGS:    0 11:20:25     -472.351929        3.2434
BFGS:    1 11:20:25     -472.501556        3.2469
BFGS:    2 11:20:25     -473.150024        3.0929
BFGS:    3 11:20:25     -473.548431        1.6190
BFGS:    4 11:20:25     -473.620270        1.6314
BFGS:    5 11:20:25     -473.812592        2.4035
      Step     Time          Energy         fmax
BFGS:    0 11:20:25     -470.738434        0.7579
BFGS:    1 11:20:25     -470.746246        0.6891
BFGS:    2 11:20:26     -470.783936        0.0697
BFGS:    3 11:20:26     -470.784027        0.0629
BFGS:    4 11:20:26     -470.784210        0.0178
BFG

KeyboardInterrupt: 

Save structures for later use in latent space clustering or analysis

In [12]:
structures = mcmc.history
with open("data/SrTiO3_001_2x2_mcmc_structures.pkl", "wb") as f:
    pickle.dump(structures, f)