In [2]:
# this is some self-defined functions for testing the model

# These package is inherited from Lenard-Jones optimization part of DMFF

import openmm.app as app
import openmm as mm
import openmm.unit as unit
import numpy as np
import jax
import jax.numpy as jnp
import dmff
from dmff.api.xmlio import XMLIO
from dmff.api.paramset import ParamSet
from dmff.generators.classical import CoulombGenerator, LennardJonesGenerator
from dmff.api.hamiltonian import Hamiltonian
from dmff.operators import ParmedLennardJonesOperator
from dmff import NeighborListFreud
from dmff.mbar import ReweightEstimator
import mdtraj as md
from tqdm import tqdm, trange
import parmed
import sys
import os
from dmff.api.topology import DMFFTopology
# this is a package I write to solve some IO problems utils.py
from utils import create_supercell, gas_generate,add_loading, simple_merge
from utils import cutoff_topology
import matplotlib.pyplot as plt
import optax
from utils import extract_from_raspa
from IPython.display import display



In [5]:
"""

Superparameters for Lenard-Jone Potential optimization, some parameters need to read aiida workflow and set them

"""
Number_points = 3           ## must be smaller than len(picked_ls)
Trajectory_length = 250#250          #液体pdb文件的个数
loop_time =   100                  #迭代循环次数    推荐50-100
scaling_factors = (2,2,2)          # This is read from aiida workflow 2,2,2 for NOTT-300
cutoff = 0.905     #This value need to check. Because Openmm a little weired to compute the cutoff, for aiida, the cutoff is 12.0
experiment_path = "/home/yutao/project/Al-MOF/nott300/Default_Dataset.csv"
Transfer_unit = 8.0284625000/9.6917060506 #It also depends on different structure

Framework_path = "data/MIL-120.pdb"
Scaled_frame_path = "data/MIL-120-scaled.pdb"
dest_path = "/home/yutao/project/MIL-120/traj2/"
copy_to_path = "./traj2/"

for direct in [dest_path, copy_to_path]:
    if not os.path.exists(direct):
        os.makedirs(direct)
        print("Create directory: ", direct)

In [6]:
'''

Teh format of experimental data: two columns which can be read by np.loadtxt without skiprows

'''

data = np.loadtxt(experiment_path, delimiter=',')
picked_ls = list(range(Number_points))#[0,1,2,3,4,5,6,7,8,9] #[0, 2, 4, 6, 8, 10, 14, 18, 22]#[0, 3, 6, 9, 12, 15, 18]
picked_pressure = [data[i,0] for i in picked_ls]
picked_isotherm = [data[i,1]*Transfer_unit for i in picked_ls]

In [None]:
"""

Some packages will be used in the workflow, I would recommend to put them in a py script

"""

import os
import numpy as np
from utils import scaling_gas, extract_from_raspa, write_scaling_gas
bar = 10**5
def move_traj(dest_path = "/home/yutao/project/MIL-120/traj1/",picked_pressure=picked_pressure, copy_to_path = "./traj1/"):
    traj_ls = os.listdir(dest_path)
    isotherm_data = [[],[]] # the first list is for pressure, the second is for loading
    jdx = 0 
    for traj in extract_from_raspa(traj_ls):
        pdb_file = traj[1]
        if not pdb_file.endswith(".pdb") or 'Movie_framework' not in pdb_file:
            continue
        if not is_close_to_list(float(traj[0])/bar, picked_pressure):
            continue
        isotherm_data[0].append(float(traj[0])/bar)
        pdb_path = os.path.join(dest_path, pdb_file)
        with open(pdb_path) as f:
            lines = f.readlines()
        num_atoms_list = []  # List to store the number of atoms in each structure
        index = 0
        write_idx = 1
        num_atoms = 0  # Variable to store the number of atoms in the current structure
        directory = copy_to_path+f"{jdx+1}"
        jdx += 1
        if not os.path.exists(directory):
            os.makedirs(directory)
            print("Directory created:", directory)
        for line in lines:
            if line.startswith("MODEL"):
                if index>=150:
                    write_scaling_gas(block_coords, "data/gas.pdb", write_idx, dest_path=directory)
                    write_idx += 1
                block_coords = []
                block_Csym = []
                index += 1
                num_atoms_list.append(num_atoms)  # Add the number of atoms to the list
                num_atoms = 0  # Reset the number of atoms for the next structure
            if line.startswith("ATOM"):
                parts = line.split()
                coords = np.array([float(parts[4]), float(parts[5]), float(parts[6])])
                block_coords.append(coords)
                block_Csym.append(parts[-1])
                num_atoms += 1  # Increment the number of atoms
        #num_atoms_list.append(num_atoms)  # Add the number of atoms for the last structure
        isotherm_data[1].append(sum(num_atoms_list)/len(num_atoms_list)/3/3/2/3)
        #print("Number of atoms in each structure for", pdb_file, ":", num_atoms_list)
    return isotherm_data

def update_mask(parameters, mask):
    updated_parameters = parameters.copy()
    
    for force_type, force_params in mask.items():
        if force_type in parameters:
            for param, mask_array in force_params.items():
                if param in parameters[force_type]:
                    # Update values based on the mask
                    updated_parameters[force_type][param] = jnp.where(mask_array == 1, 
                                                                      parameters[force_type][param], 
                                                                      0)
    return updated_parameters

def compute_binding_energy(paramset,topo, pos, lj_gen, numframe=720,cutoff=cutoff):
    topodata = dmff.DMFFTopology(topo)
    # Because dmfftopology does not provide a good entry for open.topology object generated by pdb file, I had to suplement something
    for atom in topodata.atoms():
        if atom.residue.name=="MOL":
            atom.meta['type']=atom.meta['element']
            atom.meta['class']=atom.meta['element']
        elif atom.residue.name=="GAS":
            #print(atom.meta)
            atom.meta['type']=atom.meta['element']+"_co2"
            atom.meta['class']=atom.meta['element']+"_co2"
        #print(atom.meta['element'])
    cov_mat = topodata.buildCovMat()
    lj_force = lj_gen.createPotential(
    topodata, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, args={})
    pos_jnp = jnp.array(pos.value_in_unit(unit.nanometer))
    cell_jnp = jnp.array(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer))
    cov_mat=cov_mat.at[:numframe,:numframe].set(1)
    nblist = NeighborListFreud(topo.getPeriodicBoxVectors().value_in_unit(unit.nanometer), cutoff, cov_mat)
    nblist.allocate(pos_jnp, cell_jnp)
    pairs = jnp.array(nblist.pairs)
    ener = lj_force(pos_jnp,cell_jnp, pairs, paramset)
    return ener

def detect_parameter_change(paramset_new, paramset_old, error_threshold=0.4):
    # Get the initial parameters
    initial_sigma = paramset_old.parameters['LennardJonesForce']['sigma']
    initial_epsilon = paramset_old.parameters['LennardJonesForce']['epsilon']
    
    # Get the updated parameters
    updated_sigma = paramset_new.parameters['LennardJonesForce']['sigma']
    updated_epsilon = paramset_new.parameters['LennardJonesForce']['epsilon']
    
    # Calculate the percentage change for each parameter
    sigma_change = np.abs(updated_sigma - initial_sigma) / initial_sigma
    epsilon_change = np.abs(updated_epsilon - initial_epsilon) / initial_epsilon

    # Find the indices of values that have changed by more than 40%
    sigma_indices = np.where(sigma_change > error_threshold)[0]
    epsilon_indices = np.where(epsilon_change > error_threshold)[0]
    
    return sigma_indices, epsilon_indices

def fix_changed_parameters(paramset_new, sigma_indices, epsilon_indices):
    for idx in sigma_indices:
        paramset.mask['LennardJonesForce']['sigma'] = paramset.mask['LennardJonesForce']['sigma'].at[idx].set(0)
    for idx in epsilon_indices:
        paramset.mask['LennardJonesForce']['epsilon'] = paramset.mask['LennardJonesForce']['epsilon'].at[idx].set(0)
    return paramset_new


import json
Transfer_energy_unit = 254.152/2.11525
Transfer_length_unit = 10
def update_ff(paramset, dest_path='/home/yutao/project/aiida/applications/ff_2.json'):
    global Transfer_energy_unit, Transfer_length_unit
    element_list = ['Al_', 'C_', 'H_', 'O_']
    params = paramset.parameters
    ff_data = {}
    if len(element_list) != params['LennardJonesForce']['sigma'].shape[0]-2:
        raise ValueError("Length of element list and parameter list does not match")
    sigma_list = params['LennardJonesForce']['sigma'].tolist()
    epsilon_list = params['LennardJonesForce']['epsilon'].tolist()
    for idx in range(len(element_list)):
        ff_data[element_list[idx]] = ['lennard-jones', epsilon_list[idx]*Transfer_energy_unit, sigma_list[idx]*Transfer_length_unit]
    with open(dest_path, 'w') as f:
        json.dump(ff_data, f, indent=4)

from jax import clear_backends
def analyse_traj(paramset, lj_gen, dest_path = "./traj1/", interval=3):

    global Framework_path, Scaled_frame_path

    traj_dict = {}
    global Number_points, cutoff
    traj_ls = os.listdir(dest_path)
    create_supercell(Framework_path, scaling_factors, Scaled_frame_path)

    # Filter out file names and keep only directory names
    dir_names = [name for name in traj_ls if os.path.isdir(os.path.join(dest_path, name)) and name.isdigit()]
    dir_names = sorted(map(int, dir_names))
    dir_names = [str(i) for i in dir_names]
    for directory in dir_names[:Number_points]:
        idx = int(directory)
        traj_dict[idx] = {'experiment': {'pressure': picked_pressure[idx-1], 'loading': picked_isotherm[idx-1]}, 'structure': [], 'refer_energy':[], 'loading':[]}
        gas_dir = os.path.join(dest_path, directory)
        for gas_path in os.listdir(gas_dir)[::interval]:
            topo, pos, num = simple_merge(Scaled_frame_path,os.path.join(gas_dir,gas_path))
            ener_lj = compute_binding_energy(paramset,topo, pos, lj_gen, numframe=720,cutoff=cutoff)
            traj_dict[idx]['structure'].append([topo, pos])
            traj_dict[idx]['loading'].append(num/scaling_factors[0]/scaling_factors[1]/scaling_factors[2]/3)
            traj_dict[idx]['refer_energy'].append(ener_lj)
        clear_backends()
    for key in traj_dict.keys():
        traj_dict[key]['refer_energy'] = jnp.array(traj_dict[key]['refer_energy'])
        traj_dict[key]['loading'] = jnp.array(traj_dict[key]['loading'])
        traj_dict[key]['estimator'] = ReweightEstimator(ref_energies=traj_dict[key]['refer_energy'], temperature=SET_temperature)
    return traj_dict

import subprocess
def sample():
    command = ["/home/yutao/project/aiida/applications/sample_long.sh"]
    # Run the script using subprocess
    completed_process = subprocess.run(command, capture_output=True, cwd="/home/yutao/project/aiida/applications",text=True)
    print("As long as it finishes,",completed_process.returncode)
    # Check the return code
    if completed_process.returncode == 0:
        # The script finished successfully
        display("Script finished successfully!")
        # Display the output in the notebook
        display("Script output:")
        display(completed_process.stdout)
        # Continue with your program logic here
    else:
        # The script encountered an error
        display("Script encountered an error:", completed_process.stderr)
        # Handle the error or exit the program


In [12]:
import xml.etree.ElementTree as ET

# Create the root element
forcefield = ET.Element("ForceField")

# Add AtomTypes
atomtypes = ET.SubElement(forcefield, "AtomTypes")
atom_type_data = [
    {"class": "Al", "element": "Al", "mass": "0.0", "name": "Al"},
    {"class": "C", "element": "C", "mass": "0.0", "name": "C"},
    {"class": "H", "element": "H", "mass": "0.0", "name": "H"},
    {"class": "O", "element": "O", "mass": "0.0", "name": "O"},
    {"class": "C1_co2", "element": "C", "mass": "12.010", "name": "C_co2"},
    {"class": "O1_co2", "element": "O", "mass": "15.999", "name": "O_co2"},
]
for atom_type in atom_type_data:
    ET.SubElement(atomtypes, "Type", **atom_type)

# Add Residues
residues = ET.SubElement(forcefield, "Residues")
residue_data = [
    {
        "name": "MOL",
        "atoms": [
            {"name": "Al1", "type": "Al", "charge": "1.65857"},
            {"name": "Al2", "type": "Al", "charge": "1.65856"},
            # Add more atoms here...
        ]
    },
    {
        "name": "GAS",
        "atoms": [
            {"name": "O17", "type": "O_co2", "charge": "-0.35"},
            {"name": "C18", "type": "C_co2", "charge": "0.70"},
            # Add more atoms here...
        ]
    },
]
for residue_info in residue_data:
    residue = ET.SubElement(residues, "Residue", name=residue_info["name"])
    for atom in residue_info["atoms"]:
        ET.SubElement(residue, "Atom", **atom)

# Add HarmonicBondForce
harmonic_bond_force = ET.SubElement(forcefield, "HarmonicBondForce")
ET.SubElement(
    harmonic_bond_force,
    "Bond",
    class1="C1_co2",
    class2="O1_co2",
    length="0.115999",
    k="943153.3808",
    mask="true"
)

# Add HarmonicAngleForce
harmonic_angle_force = ET.SubElement(forcefield, "HarmonicAngleForce")
ET.SubElement(
    harmonic_angle_force,
    "Angle",
    class1="O1_co2",
    class2="C1_co2",
    class3="O1_co2",
    angle="3.141593",
    k="817.5656",
    mask="true"
)

# Add NonbondedForce
nonbonded_force = ET.SubElement(forcefield, "NonbondedForce", coulomb14scale="0.8333333333333334", lj14scale="0.5")
ET.SubElement(nonbonded_force, "UseAttributeFromResidue", name="charge")
atom_data = [
    {"epsilon": "2.11525", "sigma": "0.40082", "type": "Al"},
    {"epsilon": "0.43979", "sigma": "0.34309", "type": "C"},
    {"epsilon": "0.18436", "sigma": "0.25711", "type": "H"},
    {"epsilon": "0.25079", "sigma": "0.31181", "type": "O"},
    {"epsilon": "0.65757", "sigma": "0.305", "type": "O_co2"},
    {"epsilon": "0.22469", "sigma": "0.28", "type": "C_co2"},
]
for atom_info in atom_data:
    ET.SubElement(nonbonded_force, "Atom", **atom_info)

# Function to pretty print XML
def prettify(elem, level=0):
    """Indentation function"""
    indent = "\n" + level * "  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = indent + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = indent
        for subelem in elem:
            prettify(subelem, level + 1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = indent
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = indent

prettify(forcefield)
xml_str = ET.tostring(forcefield, encoding="utf-8")
with open("forcefield.xml", "wb") as f:
    f.write(xml_str)


In [1]:
def read_cif_file(file_path):
    atom_info = []
    with open(file_path, 'r') as f:
        lines = f.readlines()
        atom_data_started = False
        for line in lines:
            if '_atom_site_label' in line:
                atom_data_started = True
                continue
            if atom_data_started:
                if line.strip():
                    atom_info.append(line.split())

    return atom_info

def extract_atom_info(atom_info):
    extracted_info = []
    for i, atom in enumerate(atom_info):
        name = f"Al{i+1}" if atom[1] == 'Al' else f"H{i+1}" if atom[1] == 'H' else f"C{i+1}"
        extracted_info.append({
            "name": name,
            "type": atom[1],
            "charge": atom[5]
        })

    return extracted_info
'''
# Example usage:
file_path = "your_cif_file.cif"  # Provide your CIF file path here
atom_info = read_cif_file(file_path)
extracted_info = extract_atom_info(atom_info)
print(extracted_info)
'''

'\n# Example usage:\nfile_path = "your_cif_file.cif"  # Provide your CIF file path here\natom_info = read_cif_file(file_path)\nextracted_info = extract_atom_info(atom_info)\nprint(extracted_info)\n'

In [4]:
cif_info = read_cif_file("/home/yutao/project/Al-MOF/nott300/RSM0516.cif")

In [5]:
cif_info

[['_atom_site_type_symbol'],
 ['_atom_site_fract_x'],
 ['_atom_site_fract_y'],
 ['_atom_site_fract_z'],
 ['_atom_site_charge'],
 ['Al', 'Al', '0.19208', '-0.00001', '0.61605', '1.68371'],
 ['Al', 'Al', '0.80806', '-0.00003', '0.38385', '1.68246'],
 ['Al', 'Al', '0.94199', '0.11614', '0.49993', '1.68268'],
 ['Al', 'Al', '0.55814', '0.88377', '0.50008', '1.68400'],
 ['H', 'H', '0.64758', '0.05901', '0.80350', '0.12250'],
 ['H', 'H', '0.50993', '0.94105', '0.19655', '0.11944'],
 ['H', 'H', '0.45646', '0.30362', '0.44094', '0.12239'],
 ['H', 'H', '0.20106', '0.69660', '0.55874', '0.12250'],
 ['H', 'H', '0.10244', '0.69649', '0.44106', '0.12348'],
 ['H', 'H', '0.24013', '0.30349', '0.55887', '0.12240'],
 ['H', 'H', '0.29343', '0.05913', '0.19647', '0.12293'],
 ['H', 'H', '0.54908', '0.94112', '0.80345', '0.12327'],
 ['H', 'H', '0.18919', '0.93580', '0.43600', '0.41840'],
 ['H', 'H', '0.56087', '0.06398', '0.56401', '0.41863'],
 ['H', 'H', '0.87507', '0.93581', '0.56395', '0.41877'],
 ['H', 

In [11]:
import xml.etree.ElementTree as ET

def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    atoms = []
    for residue in root.findall(".//Residue"):
        for atom in residue.findall(".//Atom"):
            atoms.append(atom.attrib)
    return atoms

def write_pdb(atoms, pdb_file):
    with open(pdb_file, "w") as f:
        for i, atom in enumerate(atoms):
            # PDB format: https://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM
            f.write("ATOM  {:>5d} {:>4s} MOL    1    {:>8.3f}{:>8.3f}{:>8.3f}  1.00  0.00          {}\n".format(
                i+1, atom["name"], 0.0, 0.0, 0.0, atom["charge"]))  # Coordinates set to 0 for simplicity, adjust as needed
        f.write("END\n")

# Parse XML and extract atoms
atoms = parse_xml("forcefield.xml")

# Write atoms to PDB file
write_pdb(atoms, "atoms.pdb")


In [None]:
from pymatgen.io.cif import CifParser
from xml.etree import ElementTree as ET

def parse_cif(cif_file):
    parser = CifParser(cif_file)
    structure = parser.get_structures()[0]  # Assuming there's only one structure in the CIF file
    return structure

def generate_xml_atoms(structure):
    residue = ET.Element("Residue", name="MOL")
    atomic_charges = structure.site_properties.get("charge", [])

    for i, site in enumerate(structure):
        atom = ET.SubElement(residue, "Atom", name=f"{site.species_string}{i+1}", type=site.species_string, charge=str(atomic_charges[i]))
    return residue

def write_xml(residue, xml_file):
    xml_str = ET.tostring(residue, encoding="unicode")
    with open(xml_file, "w") as f:
        f.write(xml_str)

# Parse CIF file
structure = parse_cif("your_cif_file.cif")

# Generate XML atoms
residue = generate_xml_atoms(structure)

# Write XML to file
write_xml(residue, "output.xml")