In [1]:
import hoomd
import pickle
import gsd.hoomd
import numpy as np
import pandas as pd
import os
import rowan
import warnings
warnings.filterwarnings('ignore')

## Create pps atomistic simulation to collect positions of constituent particles and COM

In [2]:
from flowermd.library import PPS, OPLS_AA_PPS
from flowermd.base import Pack,  Simulation

  from .xtc import XTCTrajectoryFile
  entry_points = metadata.entry_points()["mbuild.plugins"]


In [3]:
pps = PPS(num_mols=2, lengths=1)
system = Pack(molecules=pps, density=0.2)
system.apply_forcefield(r_cut=2.5, force_field=OPLS_AA_PPS(), auto_scale=True, remove_charges=True, scale_charges=True, remove_hydrogens=True)
pps_aa_simulation = Simulation.from_system(system)

No charged group detected, skipping electrostatics.
Initializing simulation state from a gsd.hoomd.Frame.


  all_scales[index][scaling_interaction_idxes[interaction]] = value
  all_scales[index][scaling_interaction_idxes[interaction]] = value


In [4]:
frame = pps_aa_simulation.state.get_snapshot()
positions = frame.particles.position
# indices of constituent particles
constituent_indx = [[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13]]

In [5]:

com_mass = []
com_positions = []

for indx in constituent_indx:
    total_mass = np.sum(frame.particles.mass[indx])
    com_mass.append(total_mass)
    com_positions.append(
                np.sum(
                    frame.particles.position[indx]
                    * frame.particles.mass[indx, np.newaxis],
                    axis=0,
                )
                / total_mass
            )

In [6]:
rel_const_pos =  positions[constituent_indx[0]] - com_positions[0]
rel_const_pos

array([[ 0.08849504,  0.07669724,  0.64043651],
       [-0.02556489, -0.26707529,  0.49990807],
       [-0.07843468, -0.31309532,  0.11771188],
       [-0.01726601, -0.01489721, -0.12452878],
       [-0.08479617, -0.07348143, -0.61360397],
       [ 0.09703805,  0.32916023,  0.01655017],
       [ 0.14977624,  0.3746983 ,  0.39882241]])

## Create rigid body simulation

In [7]:
def create_rigid_simulation(kT):
    rigid_simulation = hoomd.Simulation(device=hoomd.device.auto_select(), seed=1)
    rigid_simulation.create_state_from_gsd(filename='assets/pps_rigid_initial_run.gsd')
    const_particle_types = list(np.asarray(frame.particles.types)[list(frame.particles.typeid)][constituent_indx[0]])

    rigid = hoomd.md.constrain.Rigid()
    rigid.body['rigid'] = {
        "constituent_types":const_particle_types,
        "positions": rel_const_pos,
        "orientations": [(1.0, 0.0, 0.0, 0.0)]* len(rel_const_pos),
        }
    integrator = hoomd.md.Integrator(dt=0.005, integrate_rotational_dof=True)
    rigid_simulation.operations.integrator = integrator
    integrator.rigid = rigid
    rigid_centers_and_free = hoomd.filter.Rigid(("center", "free"))
    nvt = hoomd.md.methods.ConstantVolume(
        filter=rigid_centers_and_free,
        thermostat=hoomd.md.methods.thermostats.Bussi(kT=kT))
    integrator.methods.append(nvt)
    
    cell = hoomd.md.nlist.Cell(buffer=0, exclusions=['body'])
    
    lj = hoomd.md.pair.LJ(nlist=cell)
    
    # use aa pps simulation to define lj and special lj forces between constituent particles
    for k, v in dict(pps_aa_simulation.forces[0].params).items():
        lj.params[k] = v
        lj.r_cut[k] = 2.5
    
    lj.params[('rigid', ['rigid', 'ca', 'sh'])]= dict(epsilon=0, sigma=0)
    lj.r_cut[('rigid', ['rigid', 'ca', 'sh'])] = 0

    integrator.forces.append(lj)
    rigid_simulation.state.thermalize_particle_momenta(filter=rigid_centers_and_free,
                                             kT=kT)
    
    rigid_simulation.run(0)

    log_quantities = [
                        "kinetic_temperature",
                        "potential_energy",
                        "kinetic_energy",
                        "volume",
                        "pressure",
                        "pressure_tensor",
                    ]
    logger = hoomd.logging.Logger(categories=["scalar", "string", "particle"])
    logger.add(rigid_simulation, quantities=["timestep", "tps"])
    thermo_props = hoomd.md.compute.ThermodynamicQuantities(filter=hoomd.filter.All())
    rigid_simulation.operations.computes.append(thermo_props)
    logger.add(thermo_props, quantities=log_quantities)
    
    # for f in integrator.forces:
    #     logger.add(f, quantities=["energy", "forces", "energies"])

    logger.add(rigid_simulation.operations.integrator.rigid, quantities=["torques", "forces", "energies"])
    
    gsd_writer = hoomd.write.GSD(
        filename="logs/pps_pair_2/trajectory_{}.gsd".format(kT),
        trigger=hoomd.trigger.Periodic(int(1)),
        mode="wb",
        logger=logger,
        filter=hoomd.filter.All(),
        dynamic=["momentum", "property"]
        )
    
    rigid_simulation.operations.writers.append(gsd_writer)
    return rigid_simulation

    


In [8]:
kT_list = [1.]
for kT in kT_list:
    rigid_simulation = create_rigid_simulation(kT)
    rigid_simulation.run(1000)
    rigid_simulation.operations.writers[0].flush()

In [19]:
with rigid_simulation.operations.integrator.forces[0].nlist.cpu_local_nlist_arrays as arrays:
     i =0
     print(arrays.nlist[arrays.head_list[i]:arrays.head_list[i]+arrays.n_neigh[i]])
     # nlist_iter = list(zip(arrays.head_list, arrays.n_neigh))
     # for i in arrays.head_list:
     #    pr

HOOMDArray([8 9])


In [17]:
with rigid_simulation._state.cpu_local_snapshot as snap:
    print(snap.particles.rtag)

HOOMDArray([ 3 13  6  7  2  1  0  4  5  8  9 10 14 15 12 11])


In [20]:
rigid_simulation.state.get_snapshot().particles.typeid

array([0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1], dtype=uint32)

## filter data from r > 3.5

In [39]:
import gsd.hoomd
import numpy as np
kT_list = [1., 1.5, 2., 3., 6.0]
positions = []
orientations = []
forces = []
torques = []
energies = []
for kT in kT_list:
    traj = gsd.hoomd.open("logs/pps_pair/trajectory_{}.gsd".format(kT))
    for frame in traj:
        positions.append(frame.particles.position[:2])
        orientations.append(frame.particles.orientation[:2])
        forces.append(frame.log['particles/md/constrain/Rigid/forces'][:2])
        torques.append(frame.log['particles/md/constrain/Rigid/torques'][:2])
        energies.append(frame.log['particles/md/constrain/Rigid/energies'][:2])
        

In [40]:
L= frame.configuration.box[0]
L

16.986748

In [41]:
print(rigid_simulation.state.get_snapshot().configuration.box[0])
def adjust_periodic_boundary(pos, box_len):
    adjusted_pos = []
    for x in pos:
        if x > (box_len/2):
            adjusted_pos.append(x - box_len/2)
        elif x < (-box_len/2):
            adjusted_pos.append(x + box_len/2)
        else:
            adjusted_pos.append(x)
    return np.asarray(adjusted_pos)

16.98674774169922


In [42]:
keep_idx = []
for idx, pos_pair in enumerate(positions):
    dr = pos_pair[0] - pos_pair[1]
    dr_adjusted = adjust_periodic_boundary(dr, L)
    r = np.linalg.norm(dr_adjusted)
    if r <=2.5:
        keep_idx.append(idx)

In [43]:
keep_positions = np.asarray(positions)[keep_idx]
keep_orientations = np.asarray(orientations)[keep_idx]
keep_forces = np.asarray(forces)[keep_idx]
keep_torques = np.asarray(torques)[keep_idx]
keep_energies = np.asarray(energies)[keep_idx]

In [44]:
len(keep_idx)

1952

## create the dataset

In [83]:
# this is the old way where we separarted each particte into a separate row

# particle_pos = []
# neighbor_pos = []
# particle_orient_q = []
# neighbor_orient_q = []
# particle_orient_R = []
# neighbor_orient_R = []
# particle_force = []
# particle_torque = []
# particle_energy = []
# for pos, orient, force, torque, energy in zip(keep_positions, keep_orientations, keep_forces,keep_torques, keep_energies):
#     particle_pos.extend([pos[0], pos[1]])
#     neighbor_pos.extend([pos[1], pos[0]])
# 
#     particle_orient_q.extend([orient[0], orient[1]])
#     neighbor_orient_q.extend([orient[1], orient[0]])
# 
#     particle_orient_R.extend([rowan.to_matrix(orient[0]), rowan.to_matrix(orient[1])])
#     neighbor_orient_R.extend([rowan.to_matrix(orient[1]), rowan.to_matrix(orient[0])])
# 
#     particle_force.extend([force[0], force[1]])
#     particle_torque.extend([torque[0], torque[1]])
#     particle_energy.extend([energy[0], energy[1]])

In [88]:
# This is the old way where we store each particle's data in a separate row
# columns = [
#     "particle_pos",
#     "neighbor_pos",
#     "particle_orient_q",
#     "neighbor_orient_q", 
#     "particle_orient_R",
#     "neighbor_orient_R",
#     "force",
#     "torque",
#     "energy"
# ]
# 
# new_traj_df = pd.DataFrame(columns=columns)
# new_traj_df["particle_pos"] = particle_pos
# new_traj_df["neighbor_pos"] = neighbor_pos
# new_traj_df["particle_orient_q"] = particle_orient_q
# new_traj_df["neighbor_orient_q"] = neighbor_orient_q
# new_traj_df["particle_orient_R"] = particle_orient_R
# new_traj_df["neighbor_orient_R"] = neighbor_orient_R
# new_traj_df["force"] = particle_force
# new_traj_df["torque"] = particle_torque
# new_traj_df["energy"] = particle_energy


# new_traj_df = new_traj_df.sample(frac=1).reset_index(drop=True)# shuffle dataset

## Create the dataset (New version)

In this version, we store the data off all particles in a box at once, meaning that each row of the dataset contains the data of all particles in the box.


In [45]:
orientations_R = rowan.to_matrix(keep_orientations)

In [46]:
columns = [
    "position",
    "orientation_q",
    "orientation_R", 
    "force",
    "torque",
    "energy"
]

new_traj_df = pd.DataFrame(columns=columns)
new_traj_df["position"] =keep_positions.tolist()
new_traj_df["orientation_q"] = keep_orientations.tolist()
new_traj_df["orientation_R"] = orientations_R.tolist()
new_traj_df["force"] = keep_forces.tolist()
new_traj_df["torque"] = keep_torques.tolist()
new_traj_df["energy"] = keep_energies.tolist()

In [47]:
new_traj_df.head()

Unnamed: 0,position,orientation_q,orientation_R,force,torque,energy
0,"[[-1.7079815864562988, -1.6376734972000122, -4...","[[0.5738012790679932, -0.515727698802948, -0.1...","[[[0.1904458999633789, -0.5457628965377808, -0...","[[0.25468502590109915, 0.08594023537412374, 0....","[[-0.028348362909006726, -0.03091515559681838,...","[-0.0702367409284437, -0.0702367409284437]"
1,"[[-1.7059344053268433, -1.6380993127822876, -4...","[[0.5762675404548645, -0.514443039894104, -0.1...","[[[0.19347184896469116, -0.546608567237854, -0...","[[0.25544852213073665, 0.08649609495266264, 0....","[[-0.028608590314447905, -0.0302685583990233, ...","[-0.07048534777606755, -0.07048534777606755]"
2,"[[-1.7003545761108398, -1.6392571926116943, -4...","[[0.5791864395141602, -0.5129172801971436, -0....","[[[0.19708222150802612, -0.5475848317146301, -...","[[0.25854305740081085, 0.08826095240826068, 0....","[[-0.029188435830747723, -0.029553917539921794...","[-0.07141713796509878, -0.07141713796509877]"
3,"[[-1.6967456340789795, -1.6400054693222046, -4...","[[0.5843008756637573, -0.5102300047874451, -0....","[[[0.20348435640335083, -0.5492297410964966, -...","[[0.259715931368709, 0.08922912930238508, 0.25...","[[-0.02969929557475994, -0.028124960039773203,...","[-0.071812430239214, -0.071812430239214]"
4,"[[-1.692319393157959, -1.640921950340271, -4.2...","[[0.5888551473617554, -0.5078221559524536, -0....","[[[0.20926731824874878, -0.5506226420402527, -...","[[0.26161642459771406, 0.09053840007701844, 0....","[[-0.030272703855402094, -0.026829365138599334...","[-0.0724135951557511, -0.07241359515575109]"


In [48]:
new_traj_df = new_traj_df.sample(frac=1).reset_index(drop=True)# shuffle dataset

In [49]:
new_traj_df.head()

Unnamed: 0,position,orientation_q,orientation_R,force,torque,energy
0,"[[-0.0526466891169548, -1.6042687892913818, -3...","[[-0.719586968421936, -0.4931466281414032, 0.4...","[[[0.5219979882240295, -0.7815905809402466, -0...","[[-3.080746774457362, 1.7370417092434314, 1.09...","[[-0.30241506814446195, -0.3432940915308098, -...","[-0.5877426947441102, -0.5877426947441101]"
1,"[[-0.8666112422943115, -1.6951040029525757, -4...","[[0.5993897318840027, -0.08189769089221954, 0....","[[[-0.2680494785308838, 0.8574913144111633, 0....","[[2.478134199109598, -19.235788925586057, -22....","[[12.84767100016488, 0.9154931248577944, 0.592...","[-0.21466537505549455, -0.2146653750554946]"
2,"[[-1.3337652683258057, -1.6981642246246338, -4...","[[0.971187949180603, -0.1854988932609558, -0.1...","[[[0.9552318453788757, 0.04020068421959877, -0...","[[0.8859464258188247, 0.7072198571451592, 1.67...","[[0.12918127694004797, 0.46880956971786375, -0...","[-0.3223274315275283, -0.3223274315275283]"
3,"[[-0.9799020886421204, -1.764159083366394, -4....","[[0.7632633447647095, -0.04681701958179474, 0....","[[[0.16952550411224365, 0.9343368411064148, 0....","[[-0.0022683081421735196, 1.8276773152527133, ...","[[-0.852502581260281, -0.14018170038850994, 0....","[-0.41822330854308737, -0.41822330854308737]"
4,"[[-0.5956459641456604, -1.6828162670135498, -4...","[[-0.04163462668657303, -0.3112438917160034, 0...","[[[-0.8027876615524292, -0.2854381501674652, 0...","[[-1.7120832617311148, 1.1190903472811884, 2.6...","[[-0.03868431124427686, -0.318369235736899, 0....","[-0.5407088020548707, -0.5407088020548707]"


In [50]:
target_datapath="/home/marjan/Documents/code-base/ml_datasets/pps_pair"

new_traj_df.to_pickle(os.path.join(target_datapath, "raw.pkl"))
test_frac = 0.1
val_frac = 0.1

dataset_len = new_traj_df.shape[0]
test_len = int(dataset_len * test_frac)
val_len = int(dataset_len * val_frac)

test_df = new_traj_df.iloc[:test_len]

val_df = new_traj_df.iloc[test_len: test_len + val_len]

train_df = new_traj_df.iloc[test_len + val_len:]


In [51]:
train_df.to_pickle(os.path.join(target_datapath, 'train.pkl'))
val_df.to_pickle(os.path.join(target_datapath, 'val.pkl'))
test_df.to_pickle(os.path.join(target_datapath, 'test.pkl'))

In [52]:
print(test_df.shape)

(195, 6)


In [53]:
print(val_df.shape)

(195, 6)


In [54]:
print(train_df.shape)

(1562, 6)
