In [94]:
import hoomd
import pickle
import gsd.hoomd
import numpy as np
import pandas as pd
import os
import rowan
import warnings
warnings.filterwarnings('ignore')

## Create pps atomistic simulation to collect positions of constituent particles and COM

In [2]:
from flowermd.library import PPS, OPLS_AA_PPS
from flowermd.base import Pack,  Simulation

  from .xtc import XTCTrajectoryFile
  entry_points = metadata.entry_points()["mbuild.plugins"]


In [3]:
pps = PPS(num_mols=2, lengths=1)
system = Pack(molecules=pps, density=0.2)
system.apply_forcefield(r_cut=2.5, force_field=OPLS_AA_PPS(), auto_scale=True, remove_charges=True, scale_charges=True, remove_hydrogens=True)
pps_aa_simulation = Simulation.from_system(system)

No charged group detected, skipping electrostatics.
Initializing simulation state from a gsd.hoomd.Frame.


  all_scales[index][scaling_interaction_idxes[interaction]] = value
  all_scales[index][scaling_interaction_idxes[interaction]] = value


In [4]:
frame = pps_aa_simulation.state.get_snapshot()
positions = frame.particles.position
# indices of constituent particles
constituent_indx = [[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13]]

In [5]:

com_mass = []
com_positions = []

for indx in constituent_indx:
    total_mass = np.sum(frame.particles.mass[indx])
    com_mass.append(total_mass)
    com_positions.append(
                np.sum(
                    frame.particles.position[indx]
                    * frame.particles.mass[indx, np.newaxis],
                    axis=0,
                )
                / total_mass
            )

In [6]:
rel_const_pos =  positions[constituent_indx[0]] - com_positions[0]
rel_const_pos

array([[ 0.08849504,  0.07669724,  0.64043651],
       [-0.02556489, -0.26707529,  0.49990807],
       [-0.07843468, -0.31309532,  0.11771188],
       [-0.01726601, -0.01489721, -0.12452878],
       [-0.08479617, -0.07348143, -0.61360397],
       [ 0.09703805,  0.32916023,  0.01655017],
       [ 0.14977624,  0.3746983 ,  0.39882241]])

## Create rigid body simulation

In [7]:
def create_rigid_simulation(kT):
    rigid_simulation = hoomd.Simulation(device=hoomd.device.auto_select(), seed=1)
    rigid_simulation.create_state_from_gsd(filename='assets/pps_rigid_initial_run.gsd')
    const_particle_types = list(np.asarray(frame.particles.types)[list(frame.particles.typeid)][constituent_indx[0]])

    rigid = hoomd.md.constrain.Rigid()
    rigid.body['rigid'] = {
        "constituent_types":const_particle_types,
        "positions": rel_const_pos,
        "orientations": [(1.0, 0.0, 0.0, 0.0)]* len(rel_const_pos),
        }
    integrator = hoomd.md.Integrator(dt=0.005, integrate_rotational_dof=True)
    rigid_simulation.operations.integrator = integrator
    integrator.rigid = rigid
    rigid_centers_and_free = hoomd.filter.Rigid(("center", "free"))
    nvt = hoomd.md.methods.ConstantVolume(
        filter=rigid_centers_and_free,
        thermostat=hoomd.md.methods.thermostats.Bussi(kT=kT))
    integrator.methods.append(nvt)
    
    cell = hoomd.md.nlist.Cell(buffer=0, exclusions=['body'])
    
    lj = hoomd.md.pair.LJ(nlist=cell)
    
    # use aa pps simulation to define lj and special lj forces between constituent particles
    for k, v in dict(pps_aa_simulation.forces[0].params).items():
        lj.params[k] = v
        lj.r_cut[k] = 2.5
    
    lj.params[('rigid', ['rigid', 'ca', 'sh'])]= dict(epsilon=0, sigma=0)
    lj.r_cut[('rigid', ['rigid', 'ca', 'sh'])] = 0

    integrator.forces.append(lj)
    rigid_simulation.state.thermalize_particle_momenta(filter=rigid_centers_and_free,
                                             kT=kT)
    
    rigid_simulation.run(0)

    log_quantities = [
                        "kinetic_temperature",
                        "potential_energy",
                        "kinetic_energy",
                        "volume",
                        "pressure",
                        "pressure_tensor",
                    ]
    logger = hoomd.logging.Logger(categories=["scalar", "string", "particle"])
    logger.add(rigid_simulation, quantities=["timestep", "tps"])
    thermo_props = hoomd.md.compute.ThermodynamicQuantities(filter=hoomd.filter.All())
    rigid_simulation.operations.computes.append(thermo_props)
    logger.add(thermo_props, quantities=log_quantities)
    
    # for f in integrator.forces:
    #     logger.add(f, quantities=["energy", "forces", "energies"])

    logger.add(rigid_simulation.operations.integrator.rigid, quantities=["torques", "forces", "energies"])
    
    gsd_writer = hoomd.write.GSD(
        filename="logs/pps_pair/trajectory_{}.gsd".format(kT),
        trigger=hoomd.trigger.Periodic(int(1)),
        mode="wb",
        logger=logger,
        filter=hoomd.filter.All(),
        dynamic=["momentum", "property"]
        )
    
    rigid_simulation.operations.writers.append(gsd_writer)
    return rigid_simulation

    


In [18]:
kT_list = [1., 1.5, 2., 3., 6.0]
for kT in kT_list:
    rigid_simulation = create_rigid_simulation(kT)
    rigid_simulation.run(1000)
    rigid_simulation.operations.writers[0].flush()

## filter data from r > 3.5

In [21]:
positions = []
orientations = []
forces = []
torques = []
energies = []
for kT in kT_list:
    traj = gsd.hoomd.open("logs/pps_pair/trajectory_{}.gsd".format(kT))
    for frame in traj:
        positions.append(frame.particles.position[:2])
        orientations.append(frame.particles.orientation[:2])
        forces.append(frame.log['particles/md/constrain/Rigid/forces'][:2])
        torques.append(frame.log['particles/md/constrain/Rigid/torques'][:2])
        energies.append(frame.log['particles/md/constrain/Rigid/energies'][:2])
        

In [46]:
L = rigid_simulation.state.get_snapshot().configuration.box[0]
def adjust_periodic_boundary(pos, box_len):
    adjusted_pos = []
    for x in pos:
        if x > (box_len/2):
            adjusted_pos.append(x - box_len/2)
        elif x < (-box_len/2):
            adjusted_pos.append(x + box_len/2)
        else:
            adjusted_pos.append(x)
    return np.asarray(adjusted_pos)

In [74]:
keep_idx = []
for idx, pos_pair in enumerate(positions):
    dr = pos_pair[0] - pos_pair[1]
    dr_adjusted = adjust_periodic_boundary(dr, L)
    r = np.linalg.norm(dr_adjusted)
    if r <=3.5:
        keep_idx.append(idx)

In [77]:
keep_positions = np.asarray(positions)[keep_idx]
keep_orientations = np.asarray(orientations)[keep_idx]
keep_forces = np.asarray(forces)[keep_idx]
keep_torques = np.asarray(torques)[keep_idx]
keep_energies = np.asarray(energies)[keep_idx]

In [85]:
len(keep_idx)

3277

## create the dataset

In [83]:

particle_pos = []
neighbor_pos = []
particle_orient_q = []
neighbor_orient_q = []
particle_orient_R = []
neighbor_orient_R = []
particle_force = []
particle_torque = []
particle_energy = []
for pos, orient, force, torque, energy in zip(keep_positions, keep_orientations, keep_forces,keep_torques, keep_energies):
    particle_pos.extend([pos[0], pos[1]])
    neighbor_pos.extend([pos[1], pos[0]])

    particle_orient_q.extend([orient[0], orient[1]])
    neighbor_orient_q.extend([orient[1], orient[0]])

    particle_orient_R.extend([rowan.to_matrix(orient[0]), rowan.to_matrix(orient[1])])
    neighbor_orient_R.extend([rowan.to_matrix(orient[1]), rowan.to_matrix(orient[0])])

    particle_force.extend([force[0], force[1]])
    particle_torque.extend([torque[0], torque[1]])
    particle_energy.extend([energy[0], energy[1]])

In [84]:
len(particle_force)

6554

In [88]:

columns = [
    "particle_pos",
    "neighbor_pos",
    "particle_orient_q",
    "neighbor_orient_q", 
    "particle_orient_R",
    "neighbor_orient_R",
    "force",
    "torque",
    "energy"
]

new_traj_df = pd.DataFrame(columns=columns)
new_traj_df["particle_pos"] = particle_pos
new_traj_df["neighbor_pos"] = neighbor_pos
new_traj_df["particle_orient_q"] = particle_orient_q
new_traj_df["neighbor_orient_q"] = neighbor_orient_q
new_traj_df["particle_orient_R"] = particle_orient_R
new_traj_df["neighbor_orient_R"] = neighbor_orient_R
new_traj_df["force"] = particle_force
new_traj_df["torque"] = particle_torque
new_traj_df["energy"] = particle_energy

In [90]:
new_traj_df.head()

Unnamed: 0,particle_pos,neighbor_pos,particle_orient_q,neighbor_orient_q,particle_orient_R,neighbor_orient_R,force,torque,energy
0,"[-2.5077713, -1.4357663, -4.0184007]","[0.6743287, -1.1467189, -2.599957]","[-0.47221667, -0.55832356, 0.67347145, -0.1082...","[0.85002875, 0.49808922, -0.16974491, -0.02334...","[[0.06942749437257612, -0.8542838096618652, -0...","[[0.9412835575640202, -0.12941205501556396, -0...","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]",0.0
1,"[0.6743287, -1.1467189, -2.599957]","[-2.5077713, -1.4357663, -4.0184007]","[0.85002875, 0.49808922, -0.16974491, -0.02334...","[-0.47221667, -0.55832356, 0.67347145, -0.1082...","[[0.9412835575640202, -0.12941205501556396, -0...","[[0.06942749437257612, -0.8542838096618652, -0...","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]",0.0
2,"[-2.5060034, -1.4362459, -4.018891]","[0.67256075, -1.1462393, -2.5994666]","[-0.47318783, -0.56264377, 0.67033255, -0.1008...","[0.85262376, 0.4931297, -0.17208822, -0.015537...","[[0.08094936609268188, -0.8498004078865051, -0...","[[0.9402884283833759, -0.14322759211063385, -0...","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]",0.0
3,"[0.67256075, -1.1462393, -2.5994666]","[-2.5060034, -1.4362459, -4.018891]","[0.85262376, 0.4931297, -0.17208822, -0.015537...","[-0.47318783, -0.56264377, 0.67033255, -0.1008...","[[0.9402884283833759, -0.14322759211063385, -0...","[[0.08094936609268188, -0.8498004078865051, -0...","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]",0.0
4,"[-2.5033927, -1.4369541, -4.0196147]","[0.6699499, -1.145531, -2.5987427]","[-0.47396696, -0.56639576, 0.66755927, -0.0944...","[0.8548275, 0.48876634, -0.17407463, -0.008680...","[[0.09089773893356323, -0.8457125425338745, -0...","[[0.9392453283071518, -0.15532246232032776, -0...","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]",0.0


In [91]:
# shuffle dataset 
new_traj_df = new_traj_df.sample(frac=1).reset_index(drop=True)

In [92]:
new_traj_df.head()

Unnamed: 0,particle_pos,neighbor_pos,particle_orient_q,neighbor_orient_q,particle_orient_R,neighbor_orient_R,force,torque,energy
0,"[-1.2025616, -1.7761111, -4.3536453]","[-0.6308811, -0.806374, -2.2647119]","[0.9409168, -0.0742097, -0.0033562845, -0.3303...","[-0.4053922, 0.61528796, 0.67558146, 0.02583783]","[[0.7816629726685865, 0.6222361922264099, 0.04...","[[0.08584416424299268, 0.852303147315979, -0.5...","[0.17734454028054775, 0.6442765132815007, 0.95...","[-0.19864506075348648, -0.05785907726551297, 0...",-0.195967
1,"[-1.1817758, -0.6965503, -2.177009]","[-0.6516668, -1.885935, -4.441348]","[0.39517584, 0.8568144, 0.0015043953, -0.33121...","[0.13398498, -0.19950251, 0.41186494, -0.8789847]","[[0.7805897444486618, 0.2643536925315857, -0.5...","[[-0.8844935894012451, 0.07120531797409058, 0....","[0.11374025052799142, -0.1122896226466197, -0....","[0.0625343496634099, -0.07642257759990848, 0.0...",-0.056166
2,"[-2.1663728, -1.5281798, -4.1127434]","[0.3329302, -1.0543054, -2.5056143]","[-0.25683436, -0.7616157, 0.31932253, 0.5020067]","[0.76492095, 0.041044734, -0.086903445, 0.6369...","[[0.29204471659926867, -0.2285369634628296, -0...","[[0.1735774278640747, -0.981511116027832, -0.0...","[0.14369590394284582, 0.02833653345978309, 0.0...","[0.006422419569839462, -0.014112954285625152, ...",-0.032263
3,"[-2.1182837, -1.5409714, -4.125632]","[0.2848411, -1.0415138, -2.4927256]","[-0.1753283, -0.75861454, 0.25458083, 0.57354385]","[0.6973533, -0.02013286, -0.029752579, 0.7158267]","[[0.2124720811843872, -0.18514050543308258, -0...","[[-0.026586055755615234, -0.9971702098846436, ...","[0.17508272312290646, 0.03913929011851256, 0.1...","[0.0025744572667161444, -0.029960162407120926,...",-0.039252
4,"[-0.79654485, -0.9440803, -2.5488966]","[-1.0368978, -1.6384048, -4.069461]","[-0.15944633, 0.7977844, 0.53870547, -0.21889117]","[0.7881971, -0.07384217, 0.16621993, -0.58793163]","[[0.32376617193222046, 0.7897387742996216, -0....","[[0.2534146308898926, 0.9022639393806458, 0.34...","[1.6399372749330114, 24.06423819421051, 7.6121...","[15.45295407059379, -1.2129844967406687, -0.66...",-1.011747


In [95]:
target_datapath="/home/marjan/Documents/code-base/ml_datasets/pps_pair"

new_traj_df.to_pickle(os.path.join(target_datapath, "raw.pkl"))
test_frac = 0.1
val_frac = 0.1

dataset_len = new_traj_df.shape[0]
test_len = int(dataset_len * test_frac)
val_len = int(dataset_len * val_frac)

test_df = new_traj_df.iloc[:test_len]

val_df = new_traj_df.iloc[test_len: test_len + val_len]

train_df = new_traj_df.iloc[test_len + val_len:]


In [96]:
train_df.to_pickle(os.path.join(target_datapath, 'train.pkl'))
val_df.to_pickle(os.path.join(target_datapath, 'val.pkl'))
test_df.to_pickle(os.path.join(target_datapath, 'test.pkl'))

In [97]:
print(test_df.shape)

(655, 9)


In [98]:
print(val_df.shape)

(655, 9)


In [99]:
print(train_df.shape)

(5244, 9)
