# Steps to reproduce this environment to be able to use openmm-torch
Disclaimer: All solutions and fixes were suggestions found in github forums adapted for the present problem.

1. Install pytorch 1.13.0 (preferrable from conda-forge channel), using CUDA 11.7 version.
2. Install openmm from conda as well
3. Try to install openmm-torch with conda
4. Fail and die
5. Rise from the ashes like a phoenix and install openmm-torch from source like a pro (steps to install below) (to confirm if we need to install from source)
6. Install caffee from conda
7. Add the following env variables: `export LD_LIBRARY_PATH=/path/to/caffe2/build/lib:$LD_LIBRARY_PATH` and `export LD_LIBRARY_PATH=/usr/local/cuda-9.0/lib64:$LD_LIBRARY_PATH`
8. By this point you will have to recompile openmm-torch
9. In my case (Maybe not needed at all) had to uninstall pytorch and install it again.

Note: You need to import torch before importing openmmtorch, otherwise some features won't work

### Steps to install openmm-torch from source

1. Go to `https://pytorch.org/get-started/locally/` and select the combination: Stable, Linux, Libtorch, C++/Java, CUDA 11.7, and copy the command below the description "Download here (cxx11 ABI):". Then change the version from 2.0.1 to 1.13.0. Or alternatively, just copy the following command in the terminal: `wget https://download.pytorch.org/libtorch/cu117/libtorch-shared-with-deps-1.13.0%2Bcu117.zip`
2. unzip the zipped file
3. Make sure the folder where you unzipped libtorch (The thing downloaded above) is accessible from where you're going to build openmm-torch
4. Clone locally the repo https://github.com/openmm/openmm-torch
5. write in the terminal `ccmake openmm-torch`
6. In the interactive terminal, set `PYTORCH_DIR` to point to the directory where you installed the LibTorch.
7. Set `OPENMM_DIR` to point to the directory where OpenMM is installed. This is needed to locate the OpenMM header files and libraries. If you are unsure of what directory this is, the following script will print it out. Set the `CMAKE_INSTALL_PREFIX` to point to the same directory as the `OPENMM_DIR`.
8. Set the `CUDA_TOOLKIT_ROOT_DIR` because in this case we need it. And make sure `NN_BUILD_CUDA_LIB` is selected. In our case we also selected the flags `NN_BUILD_OPENCL_LIB`, `NN_BUILD_PYTHON_WRAPPERS` and the `CUDNN_STATIC`.
9. If it's the first time configuring the build, you may need to press the "configure" `[c]` option twice. After that the "generate" `[g]` option will appear. If the progress percentage reaches 100%


In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import torch
from openmmtorch import TorchForce
import os

import yaml
from pathlib import Path

import numpy as np
from openmm.app import *
from openmm import *
from openmm.unit import *
# from force_mapper import ForceMapper

# import mdtraj as md

import training_modules as tm
from sys import stdout

from simutils import ForceReporter, ForceModelConvert



In [3]:
current_dir = os.getcwd()
fmartip_dir = os.path.dirname(current_dir)
dataset_dir = os.path.dirname(fmartip_dir)
output_file = os.path.join(dataset_dir, "/storage_common/angiod/chignolin_unfolded/chignolin.data.A.kcal.test.npz")  #fmartip/ff-naive/DatasetsA2A/dataset.A2a.hydrogen.pose0.npz
dataset = dict(np.load(output_file))
# Prepare a simulation system
atomic_numbers = dataset['atom_types']
# output_file = os.path.join(dataset_dir, "sorted.bond.chig.indices.npz")  # sorted.bond.a2a.indices.npz
# bond_indices = np.load(output_file)
# # bond_indices = np.array([[14, 18], [0, 3], [6, 7], [0, 5], [19, 21]])
# dataset['bond_indices'] = np.array(bond_indices['arr_0']).reshape(-1,2)
# config_file = os.path.join(dataset_dir, "config.naive.bonds.yaml")
# conf_bonds: list = yaml.safe_load(Path(config_file).read_text())
# config_file = os.path.join(dataset_dir, "config.naive.angles.yaml")
# conf_angles: list = yaml.safe_load(Path(config_file).read_text())
# config_file = os.path.join(dataset_dir, "config.naive.dihedrals.yaml")
# conf_dihedrals: list = yaml.safe_load(Path(config_file).read_text())

In [4]:
for k, v in dataset.items():
    try:
        print(k, v.shape)
    except:
        print(k)

num_residues ()
resnames (10,)
residcs (10,)
resnumbers (10,)
num_atoms ()
atom_pos (2, 165, 3)
atom_resnames (165,)
atom_names (165,)
atom_types (165,)
atom_residcs (165,)
atom_resnumbers (165,)
ATOM_SEGIDS (165,)
atom_forces (2, 165, 3)
num_beads ()
bead_pos (2, 32, 3)
bead_idnames (32,)
bead_resnames (32,)
bead_names (32,)
bead_types (32,)
bead_residcs (32,)
bead_resnumbers (32,)
bead_segids (32,)
bead_forces (2, 32, 3)
cell (2, 6)
pbc (3,)
bead2atom_idcs (32, 13)
bead2atom_weights (32, 13)


In [5]:
# bead mass calculation
bead_mass_dict = []
for atoms in dataset["bead2atom_idcs"]:
    bead_mass = None
    for j in atoms[atoms > -1]:
        atom_type = dataset['atom_types'][j]
        if bead_mass is None:
            bead_mass = Element._elements_by_atomic_number[atom_type].mass
        else:
            bead_mass += Element._elements_by_atomic_number[atom_type].mass
    bead_mass_dict.append(bead_mass)
dataset['bead_mass'] = bead_mass_dict
bead_mass_dict = {}
for idname, bead_mass in zip(dataset['bead_idnames'], dataset['bead_mass']):
    bead_mass_dict[idname] = bead_mass
bead_mass_dict

{'TYR_BB': Quantity(value=56.043604, unit=dalton),
 'TYR_SC1': Quantity(value=26.037454, unit=dalton),
 'TYR_SC2': Quantity(value=26.037454000000004, unit=dalton),
 'TYR_SC4': Quantity(value=29.018157000000002, unit=dalton),
 'TYR_SC3': Quantity(value=26.037454000000004, unit=dalton),
 'ASP_BB': Quantity(value=56.043604, unit=dalton),
 'ASP_SC1': Quantity(value=59.044261000000006, unit=dalton),
 'PRO_BB': Quantity(value=55.035657, unit=dalton),
 'PRO_SC1': Quantity(value=42.08002200000001, unit=dalton),
 'GLU_BB': Quantity(value=56.043604, unit=dalton),
 'GLU_SC1': Quantity(value=73.070935, unit=dalton),
 'THR_BB': Quantity(value=56.043604, unit=dalton),
 'THR_SC1': Quantity(value=45.060725000000005, unit=dalton),
 'GLY_BB': Quantity(value=57.051551, unit=dalton),
 'TRP_BB': Quantity(value=56.043604, unit=dalton),
 'TRP_SC1': Quantity(value=26.037454, unit=dalton),
 'TRP_SC2': Quantity(value=28.033394, unit=dalton),
 'TRP_SC3': Quantity(value=24.02156, unit=dalton),
 'TRP_SC5': Quantit

In [6]:
pdb_file = os.path.join(dataset_dir, "/home/angiod@usi.ch/CGffap/chignolin.data.test.pdb" ) #'/home/enere@usi.ch/FMartIP/original_CG_A2A.pdb' "ChignCG_unfolded.pdb" "original_CG_A2A.pdb" "chig_CG/original_CG_a2a_Water.pdb" 
pdb = PDBFile(pdb_file) # OpenMM loader

In [7]:
starting_atomic_number = 200
unique_bead_idnames = np.unique(dataset['bead_idnames'])
for index, (atom, bead_idname) in enumerate(zip(pdb.topology.atoms(), np.unique(dataset['bead_idnames']))):
    i = dataset['bead_types'][np.where(unique_bead_idnames == bead_idname)][0]
    mass = bead_mass_dict[bead_idname]
    atomic_number = starting_atomic_number + i
    symbol = str(i)
    try:
        atom.element = Element.getByAtomicNumber(atomic_number)
    except:
        atom.element = Element(
            number=atomic_number,
            name=bead_idname,
            symbol=symbol,
            mass=mass
        )

In [8]:
# --- BUILD FORCE FIELD --- #
pos2unit = AngstromsPerNm
# pos2unit = 1.
energy2unit = KJPerKcal
# energy2unit = 1.

# - Wrap trained module to interface with OpenMM - #
trained_module_filename = os.path.join('Models', 'A.kcal.best.pt')
trained_module = torch.jit.load(trained_module_filename)

# Convert model unit of measure to OpenMM unit of measure #
ff_module = torch.jit.script(ForceModelConvert(trained_module, pos2unit=pos2unit, energy2unit=energy2unit))

p = Path(trained_module_filename)
ff_module_filename = str(Path(p.parent, p.stem + '.ff' + p.suffix))
ff_module.save(ff_module_filename)

# for k,v in trained_module.named_parameters():
#     print(k, v)

In [9]:
# --- LOAD FORCE FIELD --- #

ff = TorchForce(ff_module_filename)

# --- CREATE SIMULATION SYSTEM --- #

system = System()

for atom in pdb.topology.atoms():
    system.addParticle(atom.element.mass)

# boxVectors = pdb.topology.getPeriodicBoxVectors()
# if boxVectors is not None:
#     system.setDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2])
# print(boxVectors)
# system.usesPeriodicBoundaryConditions()

# integrator = LangevinIntegrator(310*kelvin, 1./picoseconds , 0.01*picoseconds)
# integrator = VerletIntegrator(0.01*picoseconds)
integrator = NoseHooverIntegrator(310*kelvin, 1./picosecond, 0.002*picoseconds)

while system.getNumForces() > 0:
    system.removeForce(0)
    
# The system should not contain any additional force and constrains
assert system.getNumConstraints() == 0
assert system.getNumForces() == 0

# Add the NNP to the system
system.addForce(ff)

# This line combines the molecular topology, system, and integrator to begin a new simulation. It creates a Simulation object and assigns it to a variable called simulation. 
# A Simulation object manages all the processes involved in running a simulation, such as advancing time and writing output.
simulation = Simulation(pdb.topology, system, integrator)
simulation.context.setPositions(pdb.getPositions())

# Performs a local energy minimization. It is usually a good idea to do this at the start of a simulation, since the coordinates in the PDB file might produce very large forces.
# simulation.minimizeEnergy()
print("starting Sim")

sim_root = p.parents[1] / 'Sims' / p.stem
os.makedirs(str(sim_root), exist_ok=True)
simulation.reporters.append(PDBReporter(      str(sim_root / 'output.pdb'), 1))
simulation.reporters.append(StateDataReporter(str(sim_root / 'output.dat'), 1, step=True, potentialEnergy=True, kineticEnergy=True, temperature=True, time=True, totalEnergy=True))
simulation.reporters.append(ForceReporter(    str(sim_root / 'outputforces.txt'), 1))

#This line adds another reporter to print out some basic information every 1000 time steps
simulation.step(1)
state = simulation.context.getState(getPositions=True, getEnergy=True, getForces=True)

state = simulation.context.getState(getPositions=True, getEnergy=True, getForces=True)
np.array([[a.x,a.y,a.z]for a in state.getForces()])[25:]

starting Sim


array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [None]:
output_file = os.path.join(dataset_dir, "enere@usi.ch/CGffap/dataset.notVoid.npz")  #fmartip/ff-naive/DatasetsA2A/dataset.A2a.hydrogen.pose0.npz
dataset = dict(np.load(output_file))

In [None]:
np.linalg.norm(dataset['bead_forces'],axis=0).max()

In [None]:
dataset_void = dict(np.load('/home/enere@usi.ch/CGffap/dataset.VoidNoPBC.npz'))
np.linalg.norm(dataset_void['bead_forces'],axis=0).max()

In [None]:
simForces = np.linalg.norm(np.array(simulation.reporters[2].getForces()),axis=-1)
beadForces = np.linalg.norm(dataset['bead_forces'][:len(simForces)],axis=-1)
beadForces_void = np.linalg.norm(dataset_void['bead_forces'][:len(simForces)],axis=-1)

In [None]:
# Plot losses
import matplotlib.pyplot as plt
selected_bead_index = 0
# Plotting dataset 1
plt.figure(figsize=(10, 5))
plt.plot(simForces[:,selected_bead_index], label='simForces')


# Plotting dataset 2
plt.plot(beadForces[:,selected_bead_index], linestyle='--', label='beadForces')

# Plotting dataset 3
# plt.plot(beadForces_void[:,selected_bead_index], label='beadForces void')

# Adding labels and title
plt.xlabel('Index')
plt.ylabel('Force')
plt.title('Comparison of Forces for Dataset 1 and Dataset 2')

# Adding legend
plt.legend()

# Displaying the plot
plt.show()

In [None]:
state = simulation.context.getState(getPositions=True, getEnergy=True, getForces=True)

state.getForces()