In [1]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends


"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""

'''

Here is the advanced version to do it. I will add those element I want to refine

def write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(path, 'w') as f:
        f.write(f"structure_folder = '{structure_folder}'\n")
        f.write(f"experiment_path = '{experiment_path}'\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"copy_to_path = '{copy_to_path}'\n")
        f.write(f"ff_path = '{ff_path}'\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"SET_temperature = {SET_temperature}\n")

# Call the function to create the config.py file
write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature)

# Now import the config module
import config

# Now you can use the variables from config.py
structure_folder = config.structure_folder
experiment_path = config.experiment_path
cif_path = config.cif_path
dest_path = config.dest_path
copy_to_path = config.copy_to_path
ff_path = config.ff_path
Transfer_unit = config.Transfer_unit
SET_temperature = config.SET_temperature

'''

'''

structure_folder = "/home/yutao/project/In-MOF/mil-68/"
experiment_path = os.path.join(structure_folder, "273K.csv")
cif_path = os.path.join(structure_folder, "RSM1292.cif")
dest_path = "/home/yutao/project/MIL-120/traj13/"
copy_to_path = "./traj13/"
ff_path = '/home/yutao/project/aiida/applications/ff_13.json'
Transfer_unit = 7.1974500000/4.0534302809 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (3,1,1)  

structure_folder = "/home/yutao/project/In-MOF/InOF-13/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM1424.cif")
dest_path = "/home/yutao/project/MIL-120/traj14/"
copy_to_path = "./traj14/"
ff_path = '/home/yutao/project/aiida/applications/ff_14.json'
Transfer_unit = 8.9985000000/5.1008117437 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 

structure_folder = "/home/yutao/project/In-MOF/REYJEM/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM2072.cif")
dest_path = "/home/yutao/project/MIL-120/traj15/"
copy_to_path = "./traj15/"
ff_path = '/home/yutao/project/aiida/applications/ff_15.json'
Transfer_unit = 8.9638000000/4.4192338793 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 

structure_folder = "/home/yutao/project/In-MOF/InOF-13/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM1424.cif")
dest_path = "/home/yutao/project/MIL-120/traj14/"
copy_to_path = "./traj14/"
ff_path = '/home/yutao/project/aiida/applications/ff_14.json'
Transfer_unit = 8.9985000000/5.1008117437 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 


'''

structure_folder = "/home/yutao/project/InN-MOF/ZIDDIB/"
SET_temperature = 293
Index = 1
Transfer_unit = 14.835925/4.398046  #It also depends on different structure, it also contains transfer from STP to mol/Kg
cif_path = os.path.join(structure_folder, "RSM3919.cif")
scaling_factors = (2,2,1)  
experiment_path = os.path.join(structure_folder, f"{SET_temperature}K_short.csv")
dest_path = f"/home/yutao/project/MIL-120/traj{Index}/"
copy_to_path = f"./traj{Index}/"
ff_path = f'/home/yutao/project/aiida/applications/ff_{Index}.json'






"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
# When I try to change metal element, reset four metal elements

element_list = ['In_', 'C_', 'H_', 'O_', 'N_']
Number_points = 3          ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   50                 #迭代循环次数    推荐50-100



cutoff = 0.95     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0


Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")




import shutil

'''
I clean all data in before sampling which cause problems sometimes

'''

'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]

bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit, element_list
    
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, Temperature = SET_temperature,path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"Temperature = {Temperature}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program

"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]


atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path)
write_pdb_file(pos_info,cell_parameters, Framework_path)


# Initial Optimized parameters
xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_InN.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_InN.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[4].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[0].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.01)
opt_state = optimizer.init(paramset)

#os.system(f"cp /home/yutao/project/aiida/applications/Al_graphite.json {ff_path}")
#os.system(f"cp /home/yutao/project/aiida/applications/UFF_Mg.json {ff_path}")






In [5]:
# Initial Optimized parameters
xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_InN.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_InN.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)

In [6]:
move_traj(dest_path,picked_pressure, copy_to_path)
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)
try:
    for i in range(1,Number_points+1):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")
except:
    print("Error in the data processing")
def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
update_ff(paramset, ff_path)
lj_gen.overwrite(paramset)
#sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
#paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
clear_caches()
clear_backends()  

Range of energy: -808.3678601241522 -- -504.50124908363546
Range of energy: -898.5071710726376 -- -594.3576241288646
Range of energy: -901.574913075822 -- -727.832487007445
This is before derivative [ 2.27360362 52.84604046  8.10187079 -7.87214281  6.66122244 30.35821068
 29.90843764]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [3]:
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)

DMFFException: Atom type In not found.

In [None]:

for nloop in range(loop_time):

    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)
    try:
        for i in range(1,Number_points+1):
        #print(np.average(traj_dict[i]['experiment']['loading']))
        #print(np.average(traj_dict[i]['loading']))
            print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")
    except:
        print("Error in the data processing")
    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    #sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    #paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [10]:


args_list = [(cif_path_new, picked_pressure_new, ff_path_new, dest_path_new, experiment_path_new, SET_temperature_new, Transfer_unit_new),
    (cif_path, picked_pressure, ff_path, dest_path, experiment_path, SET_temperature, Transfer_unit)
            ]

move_traj(dest_path,picked_pressure, copy_to_path)
move_traj(dest_path_new,picked_pressure_new, copy_to_path_new)

traj_dict_new = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, Framework_path=Framework_path, Scaled_frame_path=Scaled_frame_path,interval=100, picked_pressure = picked_pressure, picked_isotherm = picked_isotherm, scaling_factors = scaling_factors, SET_temperature = SET_temperature)
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path_new, numframe=atomic_number_new, cutoff=cutoff, Framework_path=Framework_path_new, Scaled_frame_path=Scaled_frame_path_new,interval=100, picked_pressure = picked_pressure_new, picked_isotherm = picked_isotherm_new, scaling_factors = scaling_factors_new, SET_temperature = SET_temperature_new)
'''
def loss1(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)
'''
def loss2(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict_new[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict_new[idx]['structure'][jdx][0], traj_dict_new[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict_new[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict_new[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict_new[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))                                                                                                                                                
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

# Assuming loss1 and loss2 are your loss functions
def combined_loss(params):
    return loss1(params) + loss2(params)

v_and_g = jax.value_and_grad(loss2, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])


This is before derivative [ 1.27981712 52.88332461 10.74487221 28.7700347  46.14063087 33.81928802]


In [9]:
traj_dict_new

{1: {'experiment': {'pressure': 0.02682803831503611,
   'loading': 0.2204925329994117},
  'structure': [[<Topology; 1 chains, 8 residues, 357 atoms, 10 bonds>,
    Quantity(value=[Vec3(x=0.7416, y=-0.5369, z=0.9535), Vec3(x=0.37000000000000005, y=-0.5375, z=0.954), Vec3(x=0.3682, y=1.0937, z=0.0004), Vec3(x=0.7387, y=0.0165, z=1.9089), Vec3(x=0.0026, y=0.003, z=0.002), Vec3(x=0.37410000000000004, y=0.001, z=0.0029000000000000002), Vec3(x=0.399, y=-0.2843, z=1.3035), Vec3(x=0.7139000000000001, y=-0.28140000000000004, z=0.6108), Vec3(x=0.7144, y=-0.14830000000000002, z=0.39630000000000004), Vec3(x=0.027800000000000005, y=1.4024, z=0.6066), Vec3(x=0.34180000000000005, y=1.3999000000000001, z=1.2952000000000001), Vec3(x=0.3426, y=1.2703, z=1.5111), Vec3(x=0.7117, y=-0.1574, z=1.5186000000000002), Vec3(x=0.3973, y=0.44120000000000004, z=1.8550000000000002), Vec3(x=0.3995, y=0.6924000000000001, z=1.8475000000000001), Vec3(x=0.3407, y=1.2732, z=0.3933), Vec3(x=0.026800000000000004, y=0.6731, 

In [7]:
traj_dict_new = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path_new, numframe=atomic_number_new, cutoff=0.70, Framework_path=Framework_path_new, Scaled_frame_path=Scaled_frame_path_new,interval=Interval)

In [8]:
Scaled_frame_path_new

'/home/yutao/project/In-MOF/mil-68/scaled_frame.pdb'

In [1]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends
from multiprocessing import Pool

"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""

'''

Here is the advanced version to do it. I will add those element I want to refine

def write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(path, 'w') as f:
        f.write(f"structure_folder = '{structure_folder}'\n")
        f.write(f"experiment_path = '{experiment_path}'\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"copy_to_path = '{copy_to_path}'\n")
        f.write(f"ff_path = '{ff_path}'\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"SET_temperature = {SET_temperature}\n")

# Call the function to create the config.py file
write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature)

# Now import the config module
import config

# Now you can use the variables from config.py
structure_folder = config.structure_folder
experiment_path = config.experiment_path
cif_path = config.cif_path
dest_path = config.dest_path
copy_to_path = config.copy_to_path
ff_path = config.ff_path
Transfer_unit = config.Transfer_unit
SET_temperature = config.SET_temperature

'''

'''

structure_folder = "/home/yutao/project/In-MOF/mil-68/"
experiment_path = os.path.join(structure_folder, "273K.csv")
cif_path = os.path.join(structure_folder, "RSM1292.cif")
dest_path = "/home/yutao/project/MIL-120/traj13/"
copy_to_path = "./traj13/"
ff_path = '/home/yutao/project/aiida/applications/ff_13.json'
Transfer_unit = 7.1974500000/4.0534302809 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (3,1,1)  

structure_folder = "/home/yutao/project/In-MOF/InOF-13/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM1424.cif")
dest_path = "/home/yutao/project/MIL-120/traj14/"
copy_to_path = "./traj14/"
ff_path = '/home/yutao/project/aiida/applications/ff_14.json'
Transfer_unit = 8.9985000000/5.1008117437 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 

structure_folder = "/home/yutao/project/In-MOF/REYJEM/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM2072.cif")
dest_path = "/home/yutao/project/MIL-120/traj15/"
copy_to_path = "./traj15/"
ff_path = '/home/yutao/project/aiida/applications/ff_15.json'
Transfer_unit = 8.9638000000/4.4192338793 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 

structure_folder = "/home/yutao/project/In-MOF/InOF-13/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM1424.cif")
dest_path = "/home/yutao/project/MIL-120/traj14/"
copy_to_path = "./traj14/"
ff_path = '/home/yutao/project/aiida/applications/ff_14.json'
Transfer_unit = 8.9985000000/5.1008117437 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 


'''



"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
# When I try to change metal element, reset four metal elements

element_list = ['In_', 'C_', 'H_', 'O_']
Number_points = 4        ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   50                 #迭代循环次数    推荐50-100
Interval = 10


cutoff = 0.95     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0





bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit, element_list
    
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)


def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff, Framework_path, Scaled_frame_path, interval, picked_pressure, picked_isotherm, scaling_factors, SET_temperature):

    global  Number_points

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path,  dest_path, exp_path,Temperature, copy_from_remote="Movies/System_0/", Number_points=Number_points):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"Temperature = {Temperature}\n")
    with open(os.path.join(save_path, 'config.py'), 'r') as f:
        print("This is my read part")
        print(f.read())
def sample(cif_path, picked_pressure, ff_path, dest_path, experiment_path, SET_temperature, Transfer_unit):
    global aiida_path
    print(f"Start Sample {cif_path} {picked_pressure} {ff_path} {dest_path} {experiment_path} {SET_temperature}")
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path, ff_path, dest_path,experiment_path, SET_temperature) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
   
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program

"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app


# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]


structure_folder = "/home/yutao/project/In-MOF/REYJEM/"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM2072.cif")
dest_path = "/home/yutao/project/MIL-120/traj15/"
copy_to_path = "./traj15/"
ff_path = '/home/yutao/project/aiida/applications/ff_15.json'
Transfer_unit = 8.9638000000/4.4192338793 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (2,2,1) 



Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")

atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path)
write_pdb_file(pos_info,cell_parameters, Framework_path)

#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]



structure_folder_new = "/home/yutao/project/In-MOF/mil-68/"
experiment_path_new = os.path.join(structure_folder_new, "273K.csv")
cif_path_new = os.path.join(structure_folder_new, "RSM1292.cif")
dest_path_new = "/home/yutao/project/MIL-120/traj13/"
copy_to_path_new = "./traj13/"
ff_path_new = '/home/yutao/project/aiida/applications/ff_13.json'
Transfer_unit_new = 7.1974500000/4.0534302809 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature_new = 273
scaling_factors_new = (3,1,1)  

Framework_path_new = os.path.join(structure_folder_new,"structure.pdb")
Forcefiled_path_new = os.path.join(structure_folder_new,"forcefield.xml")
Scaled_frame_path_new = os.path.join(structure_folder_new,"scaled_frame.pdb")


atoms = read(cif_path_new)
atomic_number_new = len(atoms)*scaling_factors_new[0]*scaling_factors_new[1]*scaling_factors_new[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path_new)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path_new)
write_pdb_file(pos_info, cell_parameters, Framework_path_new)

#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"

for direct in [dest_path_new, copy_to_path_new]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path_new, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure_new = [data[i,0] for i in picked_ls]
picked_isotherm_new = [data[i,1]*Transfer_unit_new/22.4 for i in picked_ls]


# Initial Optimized parameters
xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_In.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_In.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)


paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[0].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.01)
opt_state = optimizer.init(paramset)

#os.system(f"cp /home/yutao/project/aiida/applications/UFF_In.json {ff_path}")
os.system(f"cp /home/yutao/project/aiida/applications/UFF_In.json {ff_path}")

import time


args_list = [(cif_path_new, picked_pressure_new, ff_path_new, dest_path_new, experiment_path_new, SET_temperature_new, Transfer_unit_new),
    (cif_path, picked_pressure, ff_path, dest_path, experiment_path, SET_temperature, Transfer_unit)
            ]
# Create a Pool object
'''
p = Pool(2)

# Use apply_async to run the sample function asynchronously for each element in args_list
p.apply_async(sample, args_list[0])
time.sleep(20)
p.apply_async(sample, args_list[1])

# Close the Pool and wait for all processes to finish
p.close()
print("Start waiting for two sampling")
p.join()
'''
move_traj(dest_path,picked_pressure, copy_to_path)
move_traj(dest_path_new,picked_pressure_new, copy_to_path_new)





In [2]:
Transfer_energy_unit = 254.152/2.11525

55.8631/Transfer_energy_unit

0.46493603148903023

In [3]:
traj_dict_new = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path_new, numframe=atomic_number_new, cutoff=0.85, Framework_path=Framework_path_new, Scaled_frame_path=Scaled_frame_path_new,interval=100, picked_pressure = picked_pressure_new, picked_isotherm = picked_isotherm_new, scaling_factors = scaling_factors_new, SET_temperature = SET_temperature_new)


In [11]:
copy_to_path_new

'./traj13/'

{1: {'experiment': {'pressure': 0.02682803831503611,
   'loading': 0.2204925329994117},
  'structure': [[<Topology; 1 chains, 8 residues, 357 atoms, 10 bonds>,
    Quantity(value=[Vec3(x=0.7416, y=-0.5369, z=0.9535), Vec3(x=0.37000000000000005, y=-0.5375, z=0.954), Vec3(x=0.3682, y=1.0937, z=0.0004), Vec3(x=0.7387, y=0.0165, z=1.9089), Vec3(x=0.0026, y=0.003, z=0.002), Vec3(x=0.37410000000000004, y=0.001, z=0.0029000000000000002), Vec3(x=0.399, y=-0.2843, z=1.3035), Vec3(x=0.7139000000000001, y=-0.28140000000000004, z=0.6108), Vec3(x=0.7144, y=-0.14830000000000002, z=0.39630000000000004), Vec3(x=0.027800000000000005, y=1.4024, z=0.6066), Vec3(x=0.34180000000000005, y=1.3999000000000001, z=1.2952000000000001), Vec3(x=0.3426, y=1.2703, z=1.5111), Vec3(x=0.7117, y=-0.1574, z=1.5186000000000002), Vec3(x=0.3973, y=0.44120000000000004, z=1.8550000000000002), Vec3(x=0.3995, y=0.6924000000000001, z=1.8475000000000001), Vec3(x=0.3407, y=1.2732, z=0.3933), Vec3(x=0.026800000000000004, y=0.6731, 

In [10]:
traj_dict

{1: {'experiment': {'pressure': 0.0332410105263158,
   'loading': 0.22067376360167237},
  'structure': [[<Topology; 1 chains, 15 residues, 665 atoms, 22 bonds>,
    Quantity(value=[Vec3(x=0.6386000000000001, y=0.1252, z=1.8991), Vec3(x=0.6386000000000001, y=1.2463, z=0.633), Vec3(x=1.2696, y=0.0, z=0.0), Vec3(x=0.0077, y=0.0, z=1.2661), Vec3(x=0.9548000000000001, y=1.3415, z=2.1940000000000004), Vec3(x=1.2241, y=1.0062, z=2.1948000000000003), Vec3(x=0.9288000000000001, y=1.0488, z=1.879), Vec3(x=0.0118, y=0.4151, z=1.5464000000000002), Vec3(x=0.9473000000000001, y=0.1524, z=1.553), Vec3(x=0.9714, y=0.4739, z=1.8368), Vec3(x=1.1939, y=0.7619, z=2.0997), Vec3(x=0.032, y=0.8876, z=2.0453), Vec3(x=1.1326, y=0.5753, z=1.9304), Vec3(x=1.0754, y=0.7038000000000001, z=1.8194), Vec3(x=0.3225, y=0.0299, z=0.9279000000000001), Vec3(x=0.053200000000000004, y=0.3652, z=0.9287000000000001), Vec3(x=0.34850000000000003, y=0.3226, z=0.6129), Vec3(x=1.2655, y=0.9563000000000001, z=0.2803), Vec3(x=0.33, 

In [9]:
cutoff = 0.85 
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, Framework_path=Framework_path, Scaled_frame_path=Scaled_frame_path,interval=100, picked_pressure = picked_pressure, picked_isotherm = picked_isotherm, scaling_factors = scaling_factors, SET_temperature = SET_temperature)
traj_dict_new = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path_new, numframe=atomic_number_new, cutoff=cutoff, Framework_path=Framework_path_new, Scaled_frame_path=Scaled_frame_path_new,interval=100, picked_pressure = picked_pressure_new, picked_isotherm = picked_isotherm_new, scaling_factors = scaling_factors_new, SET_temperature = SET_temperature_new)


In [8]:
Scaled_frame_path_new

'/home/yutao/project/In-MOF/mil-68/scaled_frame.pdb'

In [7]:
cutoff

0.95

In [3]:
print(args_list[0])
print(args_list[1])

('/home/yutao/project/In-MOF/REYJEM/RSM2072.cif', [0.0332410105263158], '/home/yutao/project/aiida/applications/ff_15.json', '/home/yutao/project/MIL-120/traj15/', '/home/yutao/project/In-MOF/REYJEM/273K_short.csv', 273)
('/home/yutao/project/In-MOF/mil-68/RSM1292.cif', [0.02682803831503611], '/home/yutao/project/aiida/applications/ff_13.json', '/home/yutao/project/MIL-120/traj13/', '/home/yutao/project/In-MOF/mil-68/273K.csv', 273)


In [4]:
sample(*args_list[0])

KeyboardInterrupt: 

In [5]:
sample(*args_list[1])

KeyboardInterrupt: 

In [12]:
def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path,  dest_path, exp_path,Temperature, copy_from_remote="Movies/System_0/", Number_points=Number_points):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"Temperature = {Temperature}\n")
    with open(os.path.join(save_path, 'config.py'), 'r') as f:
        print("This is my read part")
        print(f.read())
def sample(cif_path, picked_pressure, ff_path, dest_path, experiment_path, SET_temperature):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path, ff_path, dest_path,experiment_path, SET_temperature) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    print(f"Start Sample {cif_path}")
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program


In [13]:
# Create a Pool object
p = Pool(2)
p.apply_async(sample, args_list[0])


<multiprocessing.pool.ApplyResult at 0x7aaee8a22b90>

This is my read part
ff_data = '/home/yutao/project/aiida/applications/ff_13.json'
copy_from_remote = 'Movies/System_0/'
dest_path = '/home/yutao/project/MIL-120/traj13/'
exp_path = '/home/yutao/project/In-MOF/mil-68/273K.csv'
Number_of_points = 1
cif_path = '/home/yutao/project/In-MOF/mil-68/RSM1292.cif'
pressure_list = [0.02682803831503611]
Transfer_unit = 2.028360626484845
Temperature = 273

Start Sample /home/yutao/project/In-MOF/mil-68/RSM1292.cif


In [11]:
# Create a Pool object
p = Pool(2)
p.apply_async(sample, args_list[0])
p.apply_async(sample, args_list[1])

p.close()
p.join()

KeyboardInterrupt: 

As long as it finishes, 0


'Script finished successfully!'

'Script output:'

'This is the final pk values:  758550\n[2.5061666669186744]\n[0.22067376360167237]\nThis is loss function:  2.285492903317002\n'

As long as it finishes, 0


'Script finished successfully!'

'Script output:'

'This is the final pk values:  758544\n[2.590666666925945]\n[0.22067376360167237]\nThis is loss function:  2.3699929033242726\n'

In [10]:
args_list = [(cif_path_new, picked_pressure_new, ff_path_new, dest_path_new, experiment_path_new, SET_temperature_new),
    (cif_path, picked_pressure, ff_path, dest_path, experiment_path, SET_temperature)
            ]

In [8]:
# Create a Pool object
p = Pool(2)

# Use apply_async to run the sample function asynchronously for each element in args_list
for args in args_list:
    p.apply_async(sample, args)

# Close the Pool and wait for all processes to finish
p.close()
p.join()

This is the writing partThis is the writing part  /home/yutao/project/In-MOF/REYJEM/RSM2072.cif/home/yutao/project/In-MOF/mil-68/RSM1292.cif  [0.0332410105263158][0.0332410105263158]  2.0283606264848452.028360626484845  /home/yutao/project/aiida/applications//home/yutao/project/aiida/applications/ /home/yutao/project/aiida/applications/ff_13.json  /home/yutao/project/aiida/applications/ff_15.json/home/yutao/project/MIL-120/traj13/  /home/yutao/project/MIL-120/traj15//home/yutao/project/In-MOF/mil-68/273K.csv  /home/yutao/project/In-MOF/REYJEM/273K_short.csv273 
273


KeyboardInterrupt: 

In [None]:
# Create a Pool object
p = Pool(2)

# Use apply_async to run the sample function asynchronously for each element in args_list
for args in args_list:
    p.apply_async(sample, args)

# Close the Pool and wait for all processes to finish
p.close()
p.join()

In [None]:
# Create a Pool object
p = Pool(2)

# Use apply_async to run the sample function asynchronously for each element in args_list
for args in args_list:
    p.apply_async(sample, args)

# Close the Pool and wait for all processes to finish
p.close()
p.join()

move_traj(dest_path,picked_pressure, copy_to_path)
move_traj(dest_path_new,picked_pressure_new, copy_to_path_new)

traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, Framework_path=Framework_path, Scaled_frame_path=Scaled_frame_path,interval=Interval)
traj_dict_new = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path_new, numframe=atomic_number_new, cutoff=cutoff, Framework_path=Framework_path_new, Scaled_frame_path=Scaled_frame_path_new,interval=Interval)

def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)

    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict_new[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict_new[idx]['structure'][jdx][0], traj_dict_new[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict_new[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict_new[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict_new[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
update_ff(paramset, ff_path)
lj_gen.overwrite(paramset)
sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
clear_caches()
clear_backends()  


In [10]:
print(f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
print(f" Loss: {v} and Parameters: ",paramset_old.parameters['LennardJonesForce']['sigma'], paramset_old.parameters['LennardJonesForce']['epsilon'])

 Loss: 4.210624081486642 and Parameters:  [0.39761 0.34309 0.25711 0.31181 0.305   0.28   ] [2.49397969 0.42479    0.18436    0.23579    0.65757    0.22469   ]
 Loss: 4.210624081486642 and Parameters:  [0.39761 0.34309 0.25711 0.31181 0.305   0.28   ] [2.50897969 0.43979    0.18436    0.25079    0.65757    0.22469   ]


In [3]:
301.4595*2.11525/254.152

2.5089796947299257

In [1]:
0.34309*(2**(1/6))

0.3851055041544628

In [11]:
2.5164*2.11525/254.152

0.020943431883282446

In [None]:

#os.system(f"cp /home/yutao/project/aiida/applications/UFF.json {ff_path}")
for nloop in range(100):
    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=15)
    
    for i in range(1,Number_points+1):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [19]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends


"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""

'''

Here is the advanced version to do it. I will add those element I want to refine

def write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(path, 'w') as f:
        f.write(f"structure_folder = '{structure_folder}'\n")
        f.write(f"experiment_path = '{experiment_path}'\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"copy_to_path = '{copy_to_path}'\n")
        f.write(f"ff_path = '{ff_path}'\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"SET_temperature = {SET_temperature}\n")

# Call the function to create the config.py file
write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature)

# Now import the config module
import config

# Now you can use the variables from config.py
structure_folder = config.structure_folder
experiment_path = config.experiment_path
cif_path = config.cif_path
dest_path = config.dest_path
copy_to_path = config.copy_to_path
ff_path = config.ff_path
Transfer_unit = config.Transfer_unit
SET_temperature = config.SET_temperature

'''

'''
Number_points = 5           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure
SET_temperature = 273
Framework_path = "/home/yutao/project/Al-MOF/nott300/structure.pdb"
Forcefiled_path = "/home/yutao/project/Al-MOF/nott300/forcefield.xml"
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = "/home/yutao/project/Al-MOF/nott300/scaled_frame.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
cif_path = "/home/yutao/project/Al-MOF/nott300/RSM0516.cif"
for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)
'''

'''
structure_folder = "/home/yutao/project/Al-MOF/MIL-160/"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298
'''

structure_folder = "/home/yutao/project/local/Al-MOF/MIL-160"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj5/"
copy_to_path = "./traj5/"
ff_path = '/home/yutao/project/aiida/applications/ff_5.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298

"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
# remember to change UFF_local.json and init_local.xml, extra_atoms are after the framework atoms
extra_atoms = ['Os']
suplement_dict = {
"NonbonedForce": [{"epsilon": "0.25079", "sigma": "0.31181", "type": "Oal"},],
"AtomTypes": [{"name": "Os", "class": "Os", "element": "Os", "mass": "15.9994"},],
}


Number_points = 1           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100


scaling_factors = (1,1,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0


Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]

bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit, extra_atoms
    element_list = ['Al_', 'C_', 'H_', 'O_']
    extra_atoms = [atom_name+"_" for atom_name in extra_atoms]
    if len(extra_atoms) !=0:
        element_list += extra_atoms
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program



In [1]:
"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]



atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path,suplement_dict)
write_pdb_file(pos_info,cell_parameters, Framework_path)



NameError: name 'cif_path' is not defined

In [1]:
"""

In order to debug in April 5, 2024.

"""

# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends


"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""

'''

Here is the advanced version to do it. I will add those element I want to refine

def write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(path, 'w') as f:
        f.write(f"structure_folder = '{structure_folder}'\n")
        f.write(f"experiment_path = '{experiment_path}'\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"copy_to_path = '{copy_to_path}'\n")
        f.write(f"ff_path = '{ff_path}'\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"SET_temperature = {SET_temperature}\n")

# Call the function to create the config.py file
write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature)

# Now import the config module
import config

# Now you can use the variables from config.py
structure_folder = config.structure_folder
experiment_path = config.experiment_path
cif_path = config.cif_path
dest_path = config.dest_path
copy_to_path = config.copy_to_path
ff_path = config.ff_path
Transfer_unit = config.Transfer_unit
SET_temperature = config.SET_temperature

'''

'''
Number_points = 5           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure
SET_temperature = 273
Framework_path = "/home/yutao/project/Al-MOF/nott300/structure.pdb"
Forcefiled_path = "/home/yutao/project/Al-MOF/nott300/forcefield.xml"
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = "/home/yutao/project/Al-MOF/nott300/scaled_frame.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
cif_path = "/home/yutao/project/Al-MOF/nott300/RSM0516.cif"
for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)
'''

'''
structure_folder = "/home/yutao/project/Al-MOF/MIL-160/"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298
'''

structure_folder = "/home/yutao/project/local/Al-MOF/MIL-160"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj5/"
copy_to_path = "./traj5/"
ff_path = '/home/yutao/project/aiida/applications/ff_5.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298

"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
# remember to change UFF_local.json and init_local.xml, extra_atoms are after the framework atoms
extra_atoms = ['Os']
suplement_dict = {
"NonbonedForce": [{"epsilon": "0.25079", "sigma": "0.31181", "type": "Oal"},],
"AtomTypes": [{"name": "Os", "class": "Os", "element": "Os", "mass": "15.9994"},],
}


Number_points = 3          ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100


scaling_factors = (1,1,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0


Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]

bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit, extra_atoms
    element_list = ['Al_', 'C_', 'H_', 'O_']
    extra_atoms_new = [atom_name+"_" for atom_name in extra_atoms]
    if len(extra_atoms) !=0:
        element_list += extra_atoms_new
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, Temperature = SET_temperature,path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"Temperature = {Temperature}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program










In [2]:
# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init_local.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_local.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[4].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.02)
opt_state = optimizer.init(paramset)



In [10]:
from ase.io import read, write
atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=6)


def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)


In [21]:
paramset.parameters

{'LennardJonesForce': {'sigma': Array([0.40082, 0.34309, 0.25711, 0.31181, 0.31181, 0.305  , 0.28   ],      dtype=float64),
  'epsilon': Array([2.11525, 0.43979, 0.18436, 0.25079, 0.25079, 0.65757, 0.22469],      dtype=float64),
  'sigma_nbfix': Array([], dtype=float64),
  'epsilon_nbfix': Array([], dtype=float64)}}

In [15]:
traj_dict[3]['experiment']['loading']

1.3404376123355883

In [20]:
np.average(traj_dict[3]['loading'])

0.6547619047619048

In [11]:
v

Array(0.88083228, dtype=float64)

In [None]:
for nloop in range(100):
    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=6)
    
    for i in range(1,Number_points+1):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ")
    print(paramset.parameters['LennardJonesForce']['sigma'])
    print(paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [None]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends


"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""

'''

Here is the advanced version to do it. I will add those element I want to refine

def write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(path, 'w') as f:
        f.write(f"structure_folder = '{structure_folder}'\n")
        f.write(f"experiment_path = '{experiment_path}'\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"copy_to_path = '{copy_to_path}'\n")
        f.write(f"ff_path = '{ff_path}'\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"SET_temperature = {SET_temperature}\n")

# Call the function to create the config.py file
write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature)

# Now import the config module
import config

# Now you can use the variables from config.py
structure_folder = config.structure_folder
experiment_path = config.experiment_path
cif_path = config.cif_path
dest_path = config.dest_path
copy_to_path = config.copy_to_path
ff_path = config.ff_path
Transfer_unit = config.Transfer_unit
SET_temperature = config.SET_temperature

'''

'''
Number_points = 5           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure
SET_temperature = 273
Framework_path = "/home/yutao/project/Al-MOF/nott300/structure.pdb"
Forcefiled_path = "/home/yutao/project/Al-MOF/nott300/forcefield.xml"
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = "/home/yutao/project/Al-MOF/nott300/scaled_frame.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
cif_path = "/home/yutao/project/Al-MOF/nott300/RSM0516.cif"
for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)
'''

'''
structure_folder = "/home/yutao/project/Al-MOF/MIL-160/"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298
'''

structure_folder = "/home/yutao/project/local/Al-MOF/MIL-160"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj5/"
copy_to_path = "./traj5/"
ff_path = '/home/yutao/project/aiida/applications/ff_5.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298

"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
# remember to change UFF_local.json and init_local.xml, extra_atoms are after the framework atoms
extra_atoms = ['Os']
suplement_dict = {
"NonbonedForce": [{"epsilon": "0.25079", "sigma": "0.31181", "type": "Oal"},],
"AtomTypes": [{"name": "Os", "class": "Os", "element": "Os", "mass": "15.9994"},],
}


Number_points = 1           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100


scaling_factors = (1,1,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0


Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]

bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit, extra_atoms
    element_list = ['Al_', 'C_', 'H_', 'O_']
    extra_atoms = [atom_name+"_" for atom_name in extra_atoms]
    if len(extra_atoms) !=0:
        element_list += extra_atoms
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, Temperature = SET_temperature,path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"Temperature = {Temperature}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program



"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]



atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path,suplement_dict)
write_pdb_file(pos_info,cell_parameters, Framework_path)


# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init_local.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_local.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.01)
opt_state = optimizer.init(paramset)


os.system(f"cp /home/yutao/project/aiida/applications/UFF_local.json {ff_path}")
for nloop in range(100):
    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=6)
    
    for i in range(1,Number_points+1):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [21]:
create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

In [23]:
pdbfile = app.PDBFile(Framework_path)

# Get the original topology and positions from the PDB file
original_topology = pdbfile.topology
original_positions = pdbfile.getPositions()

# Get the original box vectors
original_box_vectors = original_topology.getPeriodicBoxVectors()

In [24]:
# Iterate over the atoms in the topology
for atom in pdbfile.topology.atoms():
    print(f"Atom: {atom.name}, Element: {atom.element}")

Atom: Al1, Element: <Element aluminum>
Atom: Al2, Element: <Element aluminum>
Atom: Al3, Element: <Element aluminum>
Atom: Al4, Element: <Element aluminum>
Atom: Al5, Element: <Element aluminum>
Atom: Al6, Element: <Element aluminum>
Atom: Al7, Element: <Element aluminum>
Atom: Al8, Element: <Element aluminum>
Atom: Al9, Element: <Element aluminum>
Atom: Al10, Element: <Element aluminum>
Atom: Al11, Element: <Element aluminum>
Atom: Al12, Element: <Element aluminum>
Atom: Al13, Element: <Element aluminum>
Atom: Al14, Element: <Element aluminum>
Atom: Al15, Element: <Element aluminum>
Atom: Al16, Element: <Element aluminum>
Atom: H17, Element: <Element hydrogen>
Atom: H18, Element: <Element hydrogen>
Atom: H19, Element: <Element hydrogen>
Atom: H20, Element: <Element hydrogen>
Atom: H21, Element: <Element hydrogen>
Atom: H22, Element: <Element hydrogen>
Atom: H23, Element: <Element hydrogen>
Atom: H24, Element: <Element hydrogen>
Atom: H25, Element: <Element hydrogen>
Atom: H26, Element

In [16]:
original_topology

<Topology; 1 chains, 1 residues, 256 atoms, 0 bonds>

In [13]:
app.PDBFile(Framework_path)

<openmm.app.pdbfile.PDBFile at 0x7ff654a16a90>



In [3]:
# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init_local.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_local.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.005)
opt_state = optimizer.init(paramset)

In [4]:
paramset.parameters

{'LennardJonesForce': {'sigma': Array([0.40082, 0.34309, 0.25711, 0.31181, 0.31181, 0.305  , 0.28   ],      dtype=float64),
  'epsilon': Array([2.11525, 0.43979, 0.18436, 0.25079, 0.25079, 0.65757, 0.22469],      dtype=float64),
  'sigma_nbfix': Array([], dtype=float64),
  'epsilon_nbfix': Array([], dtype=float64)}}

In [7]:
from ase.io import read
atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

copy_to_path = "./traj2/"
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)



In [8]:
def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

In [9]:
v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

In [10]:
print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])

This is before derivative [0.10844032 1.64466599 0.51115536 1.1505745  0.         1.60380371
 1.27271881]


In [6]:
os.system(f"cp /home/yutao/project/aiida/applications/UFF.json {ff_path}")


#sample(cif_path, picked_pressure)
#move_traj(dest_path,picked_pressure, copy_to_path)
copy_to_path = "./traj2/"
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)

for i in range(1,5):
#print(np.average(traj_dict[i]['experiment']['loading']))
#print(np.average(traj_dict[i]['loading']))
    print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
update_ff(paramset, ff_path)
lj_gen.overwrite(paramset)
sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
clear_caches()
clear_backends()  


ValueError: could not convert string to float: '39   0.1'

In [10]:

def write_pdb_file(cif_info, cell_parameters, filename):
    with open(filename, 'w') as f:
        f.write("REMARK   For DMFF workflow PDB file\n")
        
        
        # Format and write the crystal parameters
        try:
            a, b, c, alpha, beta, gamma = [float(param) for param in cell_parameters]
        except ValueError:
            raise ValueError("Invalid crystal parameters provided.")
        
        f.write(f"CRYST1 {a:8.3f} {b:8.3f} {c:8.3f} {alpha:6.2f} {beta:6.2f} {gamma:6.2f}\n")
        
        # Find the index where the atom information starts
        start_index = next(i for i, sublist in enumerate(cif_info) if len(sublist) > 1)

        # Get the maximum length of atom names for consistent padding
        #max_atom_name_length = max(len(atom_info[0]) for atom_info in cif_info[start_index:])
        
        # Iterate over the cif_info list starting from the start_index
        for i, atom_info in enumerate(cif_info[start_index:], start=1):
            # If atom name is longer than 4 characters, truncate it
            if len(atom_info[0]) > 4:
                atom_name = atom_info[0][:4]
            else:
                atom_name = atom_info[0]
            atom_name = f"{atom_name:<4}"
            serial_number = f"{i:>5}"
            # Write the atom information to the file
            # atom_info[0] is the atom name, atom_info[1] is the element symbol
            f.write(f"ATOM  {serial_number} {atom_name} MOL A   1    {atom_info[2]:8.3f}{atom_info[3]:8.3f}{atom_info[4]:8.3f}  1.00  1.00          {atom_info[1]:>2}\n")
        f.write("END\n")

write_pdb_file(pos_info,cell_parameters, Framework_path)

In [3]:
app.PDBFile(os.path.join(structure_folder, "structure.pdb"))

<openmm.app.pdbfile.PDBFile at 0x7f6305b26b90>

In [None]:
os.system(f"cp /home/yutao/project/aiida/applications/UFF.json {ff_path}")
for nloop in range(2):
    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)
    
    for i in range(1,5):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [None]:
"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]


atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path)
write_pdb_file(pos_info,cell_parameters, Framework_path)


# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.005)
opt_state = optimizer.init(paramset)


os.system(f"cp /home/yutao/project/aiida/applications/UFF.json {ff_path}")
for nloop in range(100):
    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)
    
    for i in range(1,5):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [14]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends


"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
'''
Number_points = 5           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure
SET_temperature = 273
Framework_path = "/home/yutao/project/Al-MOF/nott300/structure.pdb"
Forcefiled_path = "/home/yutao/project/Al-MOF/nott300/forcefield.xml"
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = "/home/yutao/project/Al-MOF/nott300/scaled_frame.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
cif_path = "/home/yutao/project/Al-MOF/nott300/RSM0516.cif"
for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)
'''


structure_folder = "/home/yutao/project/Al-MOF/MIL-160/"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298

refined_element = ['Al', 'C', 'H', 'Oal']
# I choose to generate the whole force field for UFF, all_element = ['Al', 'C', 'H', 'Oal', 'O']

"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
Number_points = 4           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100


scaling_factors = (1,1,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0


Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]

bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit
    element_list = ['Al_', 'C_', 'H_', 'O_']
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program

"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]


atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, "/home/yutao/project/Al-MOF/nott300/forcefield.xml")
write_pdb_file(pos_info,cell_parameters, "/home/yutao/project/Al-MOF/nott300/structure.pdb")
app.PDBFile("/home/yutao/project/Al-MOF/nott300/structure.pdb")

# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
#paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.05)
opt_state = optimizer.init(paramset)





In [4]:
for i in range(1,5):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
    print(traj_dict[i]['refer_energy'])
    #print(traj_dict[i]['loading'])

[-21.73463276 -52.17458756   0.         -26.68774897 -51.87887577
   0.         -40.09625913 -25.2109372  -27.88274409 -19.8944252
   0.         -50.65566633 -26.36032941 -51.84521986   0.
 -27.38819974   0.         -45.93563246 -25.13955219   0.
   0.           0.           0.         -50.04946592 -21.94303055]
[ -27.17199174  -26.34188857 -191.9643996   -95.7371414  -108.49987534
  -47.94555374  -52.91707022 -153.93977041  -69.11926247 -105.85009904
  -90.39732314  -82.11298497    0.         -155.8119689   -72.48878444
 -191.4162218  -118.68425109  -73.93314264  -50.08671204  -48.68977914
  -75.83758363  -90.41514158  -51.25541121  -49.00372068  -50.00890635]
[ -91.13829271  -46.46587317  -74.70348563  -96.54582394 -107.30674359
 -166.69120849 -150.8144442  -102.99739617 -231.98303758  -99.73202429
 -178.73259818 -150.31034412 -204.49032192 -125.86610162 -169.28095572
 -132.66697962 -117.09395937 -144.74030126 -153.7699355  -152.97856736
 -160.21158704  -42.76089745 -184.67462794 -12

In [13]:
i = 4
print(np.average(traj_dict[i]['experiment']['loading']))
print(np.average(traj_dict[i]['loading']))
print(traj_dict[i]['refer_energy'])
print(traj_dict[i]['loading'])

1.8058673388410011
2.1011904761904763
[-155.50237228  -71.15974018  -69.73232123  -98.10254072  -99.06909078
 -154.55692184 -125.44152166 -122.65549525 -126.1075561  -132.3637116
  -23.6663487  -111.64894972 -172.17646214 -168.18871231  -26.17171056
  -48.01136147  -71.74054937 -123.26317099  -23.39586451 -172.67332961
 -106.12995018 -131.87836311 -121.67340019 -102.78387657  -70.22670645
  -82.46100953 -123.01531968  -25.16398855  -48.83877302  -47.36407361
 -136.44122895 -177.82714442 -166.64824967 -153.9670231  -164.36880184
  -71.08594177 -188.6373672  -155.00157242 -142.74960652 -146.37162535
  -51.95379528  -50.19694352  -50.78679342  -98.48619924 -114.86253003
  -73.03177528 -123.85493274  -73.48432294 -118.26506703 -197.48356193
  -73.99378723 -173.68179143 -149.65298258    0.         -150.68114501
 -144.92823182 -127.86707564  -55.45087753 -101.20985564 -130.7851907
  -67.51552996 -139.69025918  -53.82129503  -91.10052336  -25.5620099
 -105.06673542  -99.95399462 -118.63225656

In [5]:
np.min(traj_dict[1]['refer_energy'])

Array(-99.96240521, dtype=float64)

In [6]:
np.max(traj_dict[1]['loading'])

Array(2., dtype=float64)

In [8]:
traj_dict[1]

{'experiment': {'pressure': 0.008695652173912771,
  'loading': 0.2047890796623801},
 'structure': [[<Topology; 1 chains, 4 residues, 518 atoms, 4 bonds>,
   Quantity(value=[Vec3(x=0.6765, y=2.1203, z=0.0044), Vec3(x=1.4467, y=0.0015, z=1.0639), Vec3(x=1.5888, y=1.9794, z=0.7976000000000001), Vec3(x=0.5274, y=1.9792000000000003, z=0.8052), Vec3(x=1.5937000000000001, y=0.144, z=0.26389999999999997), Vec3(x=0.5326, y=0.13999999999999999, z=0.2703), Vec3(x=0.38670000000000004, y=2.1225, z=0.5371), Vec3(x=1.7343000000000002, y=2.121, z=0.531), Vec3(x=0.5311, y=1.2043, z=0.797), Vec3(x=0.38670000000000004, y=1.0626, z=0.5305), Vec3(x=0.5287000000000001, y=0.9186000000000001, z=0.264), Vec3(x=1.449, y=1.0634000000000001, z=0.0029000000000000002), Vec3(x=1.5919, y=1.2047, z=0.8029999999999999), Vec3(x=1.5892, y=0.9174, z=0.2702), Vec3(x=0.672, y=1.0593000000000001, z=1.0633), Vec3(x=1.7341000000000002, y=1.0593000000000001, z=0.5363000000000001), Vec3(x=0.4589000000000001, y=2.1155000000000004

In [11]:
print(np.average(traj_dict[1]['experiment']['loading']))
print(np.average(traj_dict[1]['loading']))
print(traj_dict[1]['refer_energy'])
print(traj_dict[1]['loading'])

0.2047890796623801
0.52
[-47.87683925 -26.72772182   0.         -22.3478092  -22.59152253
   0.           0.         -18.69328565 -72.0405869  -75.23883865
   0.           0.         -73.38401549 -22.96902278 -44.81776536
 -45.86385831 -22.46296227   0.         -25.04585445   0.
 -25.01408427   0.         -18.39859462 -22.49150504 -25.29223235]
[1.  0.5 0.  0.5 0.5 0.  0.  0.5 1.5 1.5 0.  0.  1.5 0.5 1.  1.  0.5 0.
 0.5 0.  0.5 0.  0.5 0.5 0.5]


In [4]:
def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

In [7]:
for i in range(1, 5):
    print(traj_dict[i]['experiment']['loading'])

0.2047890796623801
0.8191563186495261
1.3404376123355883
1.8058673388410011


In [6]:
error_0 = loss(paramset=paramset)

This is 83th reweight_loading results from dmff code. 0.4464285714285714 0.4464285714285714
0.2416394917661913
This is 83th reweight_loading results from dmff code. 1.4523809523809523 1.4523809523809523
0.6332246337314262
This is 83th reweight_loading results from dmff code. 1.7440476190476188 1.7440476190476188
0.4036100067120305
This is 83th reweight_loading results from dmff code. 2.1011904761904763 2.1011904761904763
0.29532313734947513


In [3]:
def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])


This is before derivative [ 2.07646564 33.02985146  8.5084078  21.97904004 32.42944604 25.62402528]


In [4]:
def loss(paramset):
    errors = []
    for idx in range(1, 2):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])

This is before derivative [0.16634862 3.01562748 1.01735619 2.26611915 3.00384555 2.19721363]


In [5]:
def loss(paramset):
    errors = []
    for idx in range(1, 3):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])

This is before derivative [0.59563465 9.90144388 2.75684607 7.34504409 9.74215759 7.42247369]


In [11]:
def loss(paramset):
    errors = []
    for idx in range(1, 2):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)

This is before derivative [0.22442972 4.10289593 1.25612104 2.80850452 4.22591486 3.06754321]


In [6]:
Number_points = 4           ## must be smaller than len(picked_ls)

In [7]:
def loss(paramset):
    errors = []
    for idx in range(1, 4):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)

This is before derivative [ 3.06095086e+04 -7.72373279e+09 -1.87626901e+10 -2.02789992e+08
 -1.06892565e+10  5.44000516e+08]


In [8]:
def loss(paramset):
    errors = []
    for idx in range(1, 5):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)

KeyboardInterrupt: 

In [9]:
traj_dict.keys()

dict_keys([1, 2, 3, 4])

In [10]:
traj_dict[3]

{'experiment': {'pressure': 0.050000000000000044,
  'loading': 1.3404376123355883},
 'structure': [[<Topology; 1 chains, 8 residues, 530 atoms, 12 bonds>,
   Quantity(value=[Vec3(x=0.6765, y=2.1203, z=0.0044), Vec3(x=1.4467, y=0.0015, z=1.0639), Vec3(x=1.5888, y=1.9794, z=0.7976000000000001), Vec3(x=0.5274, y=1.9792000000000003, z=0.8052), Vec3(x=1.5937000000000001, y=0.144, z=0.26389999999999997), Vec3(x=0.5326, y=0.13999999999999999, z=0.2703), Vec3(x=0.38670000000000004, y=2.1225, z=0.5371), Vec3(x=1.7343000000000002, y=2.121, z=0.531), Vec3(x=0.5311, y=1.2043, z=0.797), Vec3(x=0.38670000000000004, y=1.0626, z=0.5305), Vec3(x=0.5287000000000001, y=0.9186000000000001, z=0.264), Vec3(x=1.449, y=1.0634000000000001, z=0.0029000000000000002), Vec3(x=1.5919, y=1.2047, z=0.8029999999999999), Vec3(x=1.5892, y=0.9174, z=0.2702), Vec3(x=0.672, y=1.0593000000000001, z=1.0633), Vec3(x=1.7341000000000002, y=1.0593000000000001, z=0.5363000000000001), Vec3(x=0.4589000000000001, y=2.115500000000000

In [None]:
os.system(f"cp /home/yutao/project/aiida/applications/UFF.json {ff_path}")
for nloop in range(100):
    print(f"{nloop} optimization started")
    #sample(cif_path, picked_pressure)
    #move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=10)
    
    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  

In [1]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas



In [3]:
"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
Number_points = 1           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure
SET_temperature = 273
Framework_path = "/home/yutao/project/Al-MOF/nott300/structure.pdb"
Forcefiled_path = "/home/yutao/project/Al-MOF/nott300/forcefield.xml"
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = "/home/yutao/project/Al-MOF/nott300/scaled_frame.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)

In [4]:
'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit for i in picked_ls]

In [5]:
bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms
                
def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit
    element_list = ['Al_', 'C_', 'H_', 'O_']
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program


In [18]:
generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 

In [8]:
from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]

cif_path = "/home/yutao/project/Al-MOF/nott300/RSM0516.cif"
atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, "/home/yutao/project/Al-MOF/nott300/forcefield.xml")
write_pdb_file(pos_info,cell_parameters, "/home/yutao/project/Al-MOF/nott300/structure.pdb")
app.PDBFile("/home/yutao/project/Al-MOF/nott300/structure.pdb")




<openmm.app.pdbfile.PDBFile at 0x7f169f19fa10>

In [9]:
# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.05)
opt_state = optimizer.init(paramset)

In [10]:
for nloop in range(100):
    print(f"{nloop} optimization started")
    #sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=3)
    
    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  

0 optimization started


This is before derivative [0.08895529 1.71598541 0.59797636 0.74235379 1.50110402 1.19567442]
This is 0th time  Loss: 0.18248509370143667 and Parameters:  [0.40082 0.34309 0.25711 0.31181 0.305   0.28   ] [2.06525001 0.43979    0.18436    0.25079    0.65757    0.22469   ]


NameError: name 'clear_caches' is not defined

In [16]:
lj_gen.overwrite(paramset)

In [8]:
from jax import clear_caches, clear_backends

print(f"{nloop} optimization started")

move_traj(dest_path,picked_pressure, copy_to_path)
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=3)


0 optimization started


In [9]:
traj_dict

{1: {'experiment': {'pressure': 0.010575793184488841,
   'loading': 0.20293157296522996},
  'structure': [[<Topology; 1 chains, 12 residues, 588 atoms, 8 bonds>,
    Quantity(value=[Vec3(x=1.1942000000000002, y=1.0531000000000001, z=0.6486000000000001), Vec3(x=1.7903, y=1.0531000000000001, z=0.40410000000000007), Vec3(x=1.4916, y=0.12240000000000001, z=0.5263), Vec3(x=1.4922000000000002, y=0.9307000000000001, z=0.5265), Vec3(x=1.2876, y=0.062200000000000005, z=0.8459), Vec3(x=1.2876, y=0.9910000000000001, z=0.2069), Vec3(x=0.9892, y=0.31980000000000003, z=0.46420000000000006), Vec3(x=0.9896000000000001, y=0.7336, z=0.5883), Vec3(x=0.8016000000000001, y=0.7335, z=0.46440000000000003), Vec3(x=0.8015000000000001, y=0.31970000000000004, z=0.5884), Vec3(x=0.5027, y=0.0623, z=0.2068), Vec3(x=1.6969000000000003, y=0.9911, z=0.8459), Vec3(x=1.0449, y=0.9855, z=0.459), Vec3(x=1.0441, y=0.0674, z=0.5938), Vec3(x=1.9395, y=0.9855, z=0.5937), Vec3(x=0.7463000000000001, y=0.06730000000000001, z=0.4

In [15]:
traj_dict

{1: {'experiment': {'pressure': 0.010575793184488841,
   'loading': 0.20293157296522996},
  'structure': [[<Topology; 1 chains, 12 residues, 588 atoms, 8 bonds>,
    Quantity(value=[Vec3(x=1.1942000000000002, y=1.0531000000000001, z=0.6486000000000001), Vec3(x=1.7903, y=1.0531000000000001, z=0.40410000000000007), Vec3(x=1.4916, y=0.12240000000000001, z=0.5263), Vec3(x=1.4922000000000002, y=0.9307000000000001, z=0.5265), Vec3(x=1.2876, y=0.062200000000000005, z=0.8459), Vec3(x=1.2876, y=0.9910000000000001, z=0.2069), Vec3(x=0.9892, y=0.31980000000000003, z=0.46420000000000006), Vec3(x=0.9896000000000001, y=0.7336, z=0.5883), Vec3(x=0.8016000000000001, y=0.7335, z=0.46440000000000003), Vec3(x=0.8015000000000001, y=0.31970000000000004, z=0.5884), Vec3(x=0.5027, y=0.0623, z=0.2068), Vec3(x=1.6969000000000003, y=0.9911, z=0.8459), Vec3(x=1.0449, y=0.9855, z=0.459), Vec3(x=1.0441, y=0.0674, z=0.5938), Vec3(x=1.9395, y=0.9855, z=0.5937), Vec3(x=0.7463000000000001, y=0.06730000000000001, z=0.4

In [13]:
def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        print(jnp.average(reweight_loading), traj_dict[idx]['experiment']['loading'])
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

In [14]:
loss(paramset)

0.38095238095238093 0.20293157296522996


Array(0.17802081, dtype=float64)

In [None]:
def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
update_ff(paramset, ff_path)
lj_gen.overwrite(paramset)
sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
clear_caches()
clear_backends()  

In [7]:

from jax import clear_caches, clear_backends
os.system(f"cp /home/yutao/project/aiida/applications/UFF.json {ff_path}")
for nloop in range(100):
    print(f"{nloop} optimization started")
    if nloop != 0:
        sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=3)
    
    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ",paramset.parameters['LennardJonesForce']['sigma'], paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  

0 optimization started


KeyboardInterrupt: 

In [15]:
traj_dict[1].keys()

dict_keys(['experiment', 'structure', 'refer_energy', 'loading', 'estimator'])

In [22]:
topo, pos, num = simple_merge(Scaled_frame_path,"./traj2/1/1.pdb")
ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, atomic_number,cutoff)
print(ener_lj)

75018829462604.22


In [34]:
topo, pos, num = simple_merge(Scaled_frame_path,"test_one_gas.pdb")
app.PDBFile.writeFile(topo, pos, open("nott.pdb", 'w'))
ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, atomic_number,cutoff)
print(ener_lj)
topo, pos, num = simple_merge(Scaled_frame_path,"test_gas.pdb")
app.PDBFile.writeFile(topo, pos, open("nott.pdb", 'w'))
ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, atomic_number,cutoff)
print(ener_lj)

frame = app.PDBFile(Scaled_frame_path)
ener_lj = compute_binding_energy(paramset,frame.topology, frame.getPositions(), lj_gen, atomic_number,cutoff)
print(ener_lj)
frame = app.PDBFile("test_gas.pdb")
ener_lj = compute_binding_energy(paramset,frame.topology, frame.getPositions(), lj_gen, atomic_number,cutoff)
print(ener_lj)

11.0127645287102
75018829462604.22
0.0
0.0


In [27]:
frame = app.PDBFile("./traj2/1/1.pdb")

In [21]:
topo, pos, num = simple_merge("scaled_frame.pdb","./traj1/1/1.pdb")
ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, 720,cutoff)
print(ener_lj)

-123.03779077652155


In [19]:
atom_names = [atom.name for atom in topo.atoms()]
print(atom_names)


['Al1', 'Al2', 'Al3', 'Al4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'C33', 'C34', 'C35', 'C36', 'C37', 'C38', 'C39', 'C40', 'C41', 'C42', 'C43', 'C44', 'C45', 'C46', 'C47', 'C48', 'C49', 'C50', 'C51', 'C52', 'O53', 'O54', 'O55', 'O56', 'O57', 'O58', 'O59', 'O60', 'O61', 'O62', 'O63', 'O64', 'O65', 'O66', 'O67', 'O68', 'O69', 'O70', 'O71', 'O72', 'Al1', 'Al2', 'Al3', 'Al4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'C33', 'C34', 'C35', 'C36', 'C37', 'C38', 'C39', 'C40', 'C41', 'C42', 'C43', 'C44', 'C45', 'C46', 'C47', 'C48', 'C49', 'C50', 'C51', 'C52', 'O53', 'O54', 'O55', 'O56', 'O57', 'O58', 'O59', 'O60', 'O61', 'O62', 'O63', 'O64', 'O65', 'O66', 'O67', 'O68', 'O69', 'O70', 'O71', 'O72', '

In [12]:
traj_dict[1]['refer_energy']

Array([6.92613964e+13, 8.33633017e+12, 3.49918761e+14, 1.78179294e+13,
       1.00441338e+12, 3.34193331e+11, 1.85831157e+10, 4.08038465e+14,
       1.68965857e+11, 3.28957338e+12, 1.14730637e+09, 5.59556096e+12,
       1.11577749e+09, 1.53134369e+12, 5.19158574e+11, 2.70836980e+17,
       2.71911596e+14], dtype=float64)

In [None]:

# This part is to generate the cife files necessary for the isotherm workflow to optimize local environments

def read_lammps_data(filename):
    data = {}
    data['Bonds'] = []
    data['Bondtype'] = []
    with open(filename, 'r') as f:
        lines = f.readlines()
        bond_output = 0
        bond_type_output = 0

        num_atoms = 0 

        for line in lines:
            if line.startswith('Angles'):
                bond_output = 0
            if line.startswith('Bonds'):               
                bond_output = 1
            if bond_output == 1:
                if len(line.split()) == 4:
                    bond_ls = [int(idx) for idx in line.split()]
                    data['Bonds'].append(bond_ls)
            if line.startswith('Bond Coeffs'):               
                bond_type_output = 1
            if bond_type_output == 1:
                if len(line.split()) == 6:
                    type_ls = line.split()
                    data['Bondtype'].append([int(type_ls[0]), type_ls[-2], type_ls[-1]])
            if line.startswith('Angle Coeffs'):               
                bond_type_output = 0
            if "atoms" in line:
                num_atoms = int(line.split()[0])
    return num_atoms,data

def analysis_cif_file(file_path):
    atom_info = []
    cell_info = []
    with open(file_path, 'r') as f:
        lines = f.readlines()
        atom_data_started = False
        for line in lines:
            if not atom_data_started:
                cell_info.append(line)
            if '_atom_site_charge' in line:
                atom_data_started = True
                continue
            if atom_data_started:
                if line.strip() and len(line.split())==6:
                    atom_info.append(line.split())

    return cell_info, atom_info

def modify_cif_file(cif_path, atom_info, cell_info, data):

    # check the bonds between Al6+3 and O_2
    for type in data['Bondtype']:
        if type[1] == 'Al6+3' and type[2] == 'O_2':
            bond_idx = type[0]
            break
    o_idx_ls = []
    # check the O2 I need to find
    for bond in data['Bonds']:
        if bond[1] == bond_idx:
            o_idx_ls.append(bond[-1]) # o_idx_ls contains the same number of O several times if O connected to two Al
    with open(cif_path, 'w') as f:
        for line in cell_info:
            f.write(line)
        for o_idx, atom in enumerate(atom_info):
            if int(o_idx+1) in o_idx_ls:
                atom[0] = 'Os'
            f.write(' '.join(atom)+'\n')

# Usage
            
data_path = '/home/yutao/project/Al-MOF/MIL-160/data.MIL-160-Al'
cif_path = '/home/yutao/project/Al-MOF/MIL-160/MIL-160-Al.cif'
new_cif = '/home/yutao/project/local/MIL-160-Al.cif'
num_atoms, data = read_lammps_data(data_path)

'''
for key in data.keys():
    print(key)
    print(data[key])
'''

cell_info, atom_info = analysis_cif_file(cif_path)

if num_atoms != len(atom_info):
    raise ValueError("The number of atoms in cif file and lammps data file are not consistent")

modify_cif_file(new_cif, atom_info, cell_info, data)

In [8]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
from jax import clear_caches, clear_backends


"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""

'''

Here is the advanced version to do it. I will add those element I want to refine

def write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature, path = "/home/yutao/project/aiida/applications/config.py"):
    with open(path, 'w') as f:
        f.write(f"structure_folder = '{structure_folder}'\n")
        f.write(f"experiment_path = '{experiment_path}'\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"copy_to_path = '{copy_to_path}'\n")
        f.write(f"ff_path = '{ff_path}'\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"SET_temperature = {SET_temperature}\n")

# Call the function to create the config.py file
write_config(structure_folder, experiment_path, cif_path, dest_path, copy_to_path, ff_path, Transfer_unit, SET_temperature)

# Now import the config module
import config

# Now you can use the variables from config.py
structure_folder = config.structure_folder
experiment_path = config.experiment_path
cif_path = config.cif_path
dest_path = config.dest_path
copy_to_path = config.copy_to_path
ff_path = config.ff_path
Transfer_unit = config.Transfer_unit
SET_temperature = config.SET_temperature

'''

'''
Number_points = 5           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure
SET_temperature = 273
Framework_path = "/home/yutao/project/Al-MOF/nott300/structure.pdb"
Forcefiled_path = "/home/yutao/project/Al-MOF/nott300/forcefield.xml"
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = "/home/yutao/project/Al-MOF/nott300/scaled_frame.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_3.json'
cif_path = "/home/yutao/project/Al-MOF/nott300/RSM0516.cif"
for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)
'''

'''
structure_folder = "/home/yutao/project/Al-MOF/MIL-160/"
experiment_path = os.path.join(structure_folder, "298K_short.csv")
cif_path = os.path.join(structure_folder, "MIL-160-Al.cif")
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"
ff_path = '/home/yutao/project/aiida/applications/ff_2.json'
Transfer_unit = 15.9036500000/5.0184135189 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 298
'''

structure_folder = "/home/yutao/project/local/Al-MOF/mil121-CO"
experiment_path = os.path.join(structure_folder, "273K_short.csv")
cif_path = os.path.join(structure_folder, "RSM0112.cif")
dest_path = "/home/yutao/project/MIL-120/traj10/"
copy_to_path = "./traj10/"
ff_path = '/home/yutao/project/aiida/applications/ff_10.json'
Transfer_unit =  1.5487312500/2.6419895880 #It also depends on different structure, it also contains transfer from STP to mol/Kg
SET_temperature = 273
scaling_factors = (3,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300

"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
# remember to change UFF_local.json and init_local.xml, extra_atoms are after the framework atoms
extra_atoms = ['Os', 'Cs']
suplement_dict = {
"NonbonedForce": [{"epsilon": "0.25079", "sigma": "0.31181", "type": "Oal"},
                  {"epsilon": "0.43979", "sigma": "0.34309", "type": "Cs"}],
"AtomTypes": [{"name": "Os", "class": "Os", "element": "Os", "mass": "0"},
              {"name": "Cs", "class": "Cs", "element": "Cs", "mass": "0"},],
}


Number_points = 3          ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100

cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0


Framework_path = os.path.join(structure_folder,"structure.pdb")
Forcefiled_path = os.path.join(structure_folder,"forcefield.xml")
#In the whole workflow, new files will be written to the dest_path, and the original files will be copied to the copy_to_path
aiida_path = "/home/yutao/project/aiida/applications/"
Scaled_frame_path = os.path.join(structure_folder,"scaled_frame.pdb")

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)



'''

The format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit/22.4 for i in picked_ls]

bar = 10**5

def is_close_to_list(value, value_list):
    for list_value in value_list:
        relative_error = abs((value - list_value) / list_value)
        if relative_error < 0.01:
            return 1
    return 0

def move_traj(dest_path ,picked_pressure, copy_to_path):
    global bar
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters


def compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path):
    global Transfer_energy_unit, Transfer_length_unit, extra_atoms
    element_list = ['Al_', 'C_', 'H_', 'O_']
    extra_atoms_new = [atom_name+"_" for atom_name in extra_atoms]
    if len(extra_atoms) !=0:
        element_list += extra_atoms_new
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path, numframe, cutoff,  interval):

    global Framework_path, Scaled_frame_path, Number_points, picked_pressure, picked_isotherm, scaling_factors, SET_temperature

    traj_dict = {}

    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe,cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)

    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess

def generate_config(cif_path, picked_pressure, Transfer_unit, save_path,ff_path=ff_path, copy_from_remote="Movies/System_0/", dest_path=dest_path, exp_path=experiment_path, Number_points=Number_points, Temperature = SET_temperature,path = "/home/yutao/project/aiida/applications/config.py"):
    with open(os.path.join(save_path, 'config.py'), 'w') as f:
        f.write(f"ff_data = '{ff_path}'\n")
        f.write(f"copy_from_remote = '{copy_from_remote}'\n")
        f.write(f"dest_path = '{dest_path}'\n")
        f.write(f"exp_path = '{exp_path}'\n")
        f.write(f"Number_of_points = {Number_points}\n")
        f.write(f"cif_path = '{cif_path}'\n")
        f.write(f"pressure_list = {picked_pressure}\n")
        f.write(f"Transfer_unit = {Transfer_unit}\n")
        f.write(f"Temperature = {Temperature}\n")

def sample(cif_path, picked_pressure):
    global aiida_path, Transfer_unit
    generate_config(cif_path, picked_pressure, Transfer_unit, aiida_path) 
    command = [os.path.join(aiida_path, "sample_workflow.sh")]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program



"""

Write the necessary files

"""

from utils import write_force_field, write_pdb_file, rename_atoms, read_cif_file, transform_cif_info
from ase.io import read
from openmm import app
# co2 form TraPPE File, O17, C18 are just inherited from the first example: MIL-120 
co2_info = [{"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"}]



atoms = read(cif_path)
atomic_number = len(atoms)*scaling_factors[0]*scaling_factors[1]*scaling_factors[2]

cell_parameters = atoms.get_cell_lengths_and_angles() # Get the cell parameters
carterisian_pos = atoms.get_positions()
cif_info = read_cif_file(cif_path)
transformed_info = transform_cif_info(cif_info)
pos_info = rename_atoms(cif_info, carterisian_pos)

write_force_field(transformed_info, co2_info, Forcefiled_path,suplement_dict)
write_pdb_file(pos_info,cell_parameters, Framework_path)


# Initial Optimized parameters
xmlio = XMLIO()
xmlio.loadXML("data/init_CO.xml")
ffinfo = xmlio.parseXML()
paramset_old = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset_old)

xmlio = XMLIO()
#xmlio.loadXML("data/init.xml")
xmlio.loadXML("data/init_CO.xml")
#xmlio.loadXML("0219.xml")
ffinfo = xmlio.parseXML()
paramset = ParamSet()
lj_gen = LennardJonesGenerator(ffinfo, paramset)


paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[0].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[1].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[2].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[3].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[4].set(0)
paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[5].set(0)

paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[1].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[2].set(0)
paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[3].set(0)

optimizer = optax.adam(0.01)
opt_state = optimizer.init(paramset)






In [7]:
Framework_path

'/home/yutao/project/Al-MOF/mil121/structure.pdb'

In [9]:
move_traj(dest_path,picked_pressure, copy_to_path)
traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=20)

for i in range(1,Number_points+1):
#print(np.average(traj_dict[i]['experiment']['loading']))
#print(np.average(traj_dict[i]['loading']))
    print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

def loss(paramset):
    errors = []
    for idx in range(1, Number_points+1):
        energies = []
        for jdx in range(len(traj_dict[idx]['structure'])):  
            ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
            energies.append(ener.reshape((1,)))
        energies = jnp.concatenate(energies)
        weight = traj_dict[idx]['estimator'].estimate_weight(energies)
        reweight_loading = traj_dict[idx]['loading'] * weight
        #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
        error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
        errors.append(error.reshape((1,)))
        #print(error)
    errors = jnp.concatenate(errors)
    return jnp.sum(errors)

v_and_g = jax.value_and_grad(loss, 0)
v, g = v_and_g(paramset)


Range of energy: -600.8716643945909 -- -371.70043597520566
Range of energy: -621.2578368705344 -- -374.15055918017686
Range of energy: -689.8197207936998 -- -468.4199461546967


In [6]:
paramset.parameters

{'LennardJonesForce': {'epsilon': Array([2.09525, 0.43979, 0.18436, 0.25079, 0.25079, 0.43979, 0.65757,
         0.22469], dtype=float64),
  'epsilon_nbfix': Array([], dtype=float64),
  'sigma': Array([0.40082, 0.34309, 0.25711, 0.31181, 0.31181, 0.34309, 0.305  ,
         0.28   ], dtype=float64),
  'sigma_nbfix': Array([], dtype=float64)}}

In [10]:

print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
#g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
#print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
updates, opt_state = optimizer.update(g, opt_state)
updates.parameters = update_mask(updates.parameters,paramset.mask)
paramset = optax.apply_updates(paramset, updates)
paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
update_ff(paramset, ff_path)
lj_gen.overwrite(paramset)
sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
print(paramset.parameters['LennardJonesForce']['sigma'])
print(paramset.parameters['LennardJonesForce']['epsilon'])

This is before derivative [ 0.43461778  6.82039844  1.19445505  3.39003957  3.79634561  1.52601427
 10.04082475  7.73919585]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
os.system(f"cp /home/yutao/project/aiida/applications/UFF_CO.json {ff_path}")
for nloop in range(100):
    print(f"{nloop} optimization started")
    sample(cif_path, picked_pressure)
    move_traj(dest_path,picked_pressure, copy_to_path)
    traj_dict = analyse_traj(paramset=paramset, lj_gen=lj_gen, dest_path=copy_to_path, numframe=atomic_number, cutoff=cutoff, interval=6)
    
    for i in range(1,Number_points+1):
    #print(np.average(traj_dict[i]['experiment']['loading']))
    #print(np.average(traj_dict[i]['loading']))
        print(f"Range of energy: {min(traj_dict[i]['refer_energy'])} -- {max(traj_dict[i]['refer_energy'])}")

    def loss(paramset):
        errors = []
        for idx in range(1, Number_points+1):
            energies = []
            for jdx in range(len(traj_dict[idx]['structure'])):  
                ener = compute_binding_energy(paramset, traj_dict[idx]['structure'][jdx][0], traj_dict[idx]['structure'][jdx][1], lj_gen, numframe=atomic_number,cutoff=cutoff)
                energies.append(ener.reshape((1,)))
            energies = jnp.concatenate(energies)
            weight = traj_dict[idx]['estimator'].estimate_weight(energies)
            reweight_loading = traj_dict[idx]['loading'] * weight
            #print(f"This is {jdx}th reweight_loading results from dmff code.",jnp.average(traj_dict[idx]['loading']),jnp.average(reweight_loading))
            error = jnp.abs(jnp.average(reweight_loading)-traj_dict[idx]['experiment']['loading'])
            errors.append(error.reshape((1,)))
            #print(error)
        errors = jnp.concatenate(errors)
        return jnp.sum(errors)

    v_and_g = jax.value_and_grad(loss, 0)
    v, g = v_and_g(paramset)

    print("This is before derivative",g.parameters['LennardJonesForce']['epsilon'])
    #g.parameters['LennardJonesForce']['epsilon'] = g.parameters['LennardJonesForce']['epsilon']*scalar_epsilon
    #print("This is scaled derivative",g.parameters['LennardJonesForce']['epsilon'])
    updates, opt_state = optimizer.update(g, opt_state)
    updates.parameters = update_mask(updates.parameters,paramset.mask)
    paramset = optax.apply_updates(paramset, updates)
    paramset = jax.tree_map(lambda x: jnp.clip(x, 0.0, 1e8), paramset)
    update_ff(paramset, ff_path)
    lj_gen.overwrite(paramset)
    sigma_indices, epsilon_indices = detect_parameter_change(paramset, paramset_old,0.9)
    paramset = fix_changed_parameters(paramset, sigma_indices, epsilon_indices)
    print(f"This is {nloop}th time", f" Loss: {v} and Parameters: ")
    print(paramset.parameters['LennardJonesForce']['sigma'])
    print(paramset.parameters['LennardJonesForce']['epsilon'])
    clear_caches()
    clear_backends()  


In [3]:
clear_caches()
clear_backends() 