In [3]:
from flowermd.library import PPS, OPLS_AA_PPS
from flowermd.base import Pack,  Simulation
import hoomd

import numpy as np
import gsd.hoomd

import warnings
warnings.filterwarnings('ignore')

In [11]:
num_mols = 20

## Initialize the rigid frame (run once only if num_mols changed)

In [12]:
pps = PPS(num_mols=num_mols, lengths=1)
system = Pack(molecules=pps, density=0.5,packing_expand_factor=3)
system.apply_forcefield(r_cut=2.5, force_field=OPLS_AA_PPS(), auto_scale=True, remove_charges=True, scale_charges=True, remove_hydrogens=True)
pps_ff = system.hoomd_forcefield

No charged group detected, skipping electrostatics.


In [13]:
system.to_gsd("assets/pps_aa_N{}.gsd".format(num_mols))

In [14]:

aa_frame = gsd.hoomd.open("assets/pps_aa_N{}.gsd".format(num_mols))[0]
positions = aa_frame.particles.position
# indices of constituent particles
constituent_indx = [np.arange(7) + (i*7) for i in range(num_mols) ]


com_mass = []
com_positions = []

for indx in constituent_indx:
    total_mass = np.sum(aa_frame.particles.mass[indx])
    com_mass.append(total_mass)
    com_positions.append(
                np.sum(
                    aa_frame.particles.position[indx]
                    * aa_frame.particles.mass[indx, np.newaxis],
                    axis=0,
                )
                / total_mass
            )


rel_const_pos =  positions[constituent_indx[0]] - com_positions[0]

In [15]:
const_particle_types = list(np.asarray(aa_frame.particles.types)[list(aa_frame.particles.typeid)][constituent_indx[0]])

In [16]:
L = system.hoomd_snapshot.configuration.box[0]
L

16.17887

In [17]:
sample_rigid_lattice = gsd.hoomd.open('assets/pps_rigid_lattice_100.gsd')[0]
I = sample_rigid_lattice.particles.moment_inertia[0]
I

array([0.7980529 , 0.88629776, 0.18916835], dtype=float32)

In [18]:
# create the init rigid lattice

rigid_frame = gsd.hoomd.Frame()
rigid_frame.particles.types = ['rigid', 'ca', 'sh']

N_rigid = len(com_mass)
rigid_frame.particles.N = N_rigid
rigid_frame.particles.position = com_positions
rigid_frame.particles.typeid = [0] * N_rigid
rigid_frame.configuration.box = [L, L, L, 0, 0, 0]
rigid_frame.particles.mass = com_mass
rigid_frame.particles.moment_inertia = np.tile(I, (N_rigid, 1))
rigid_frame.particles.orientation = [(1, 0, 0, 0)] * N_rigid

rigid = hoomd.md.constrain.Rigid()
rigid.body['rigid'] = {
    "constituent_types":const_particle_types,
    "positions": rel_const_pos,
    "orientations": [(1.0, 0.0, 0.0, 0.0)]* len(rel_const_pos),
    }
simulation = hoomd.Simulation(device=hoomd.device.CPU(), seed=4)
simulation.create_state_from_snapshot(rigid_frame)

rigid.create_bodies(simulation.state)
integrator = hoomd.md.Integrator(dt=0.005, integrate_rotational_dof=True)
integrator.rigid = rigid
simulation.operations.integrator = integrator
simulation.run(0)

hoomd.write.GSD.write(state=simulation.state, mode='wb', filename='assets/pps_rigid_lattice_{}.gsd'.format(num_mols))

In [19]:
N_rigid

20

### Run rigid simulations

In [20]:
def create_rigid_simulation(kT, num_mols):
    rigid_simulation = hoomd.Simulation(device=hoomd.device.auto_select(), seed=1)
    rigid_simulation.create_state_from_gsd(filename='assets/pps_rigid_lattice_{}.gsd'.format(num_mols))
    const_particle_types = list(np.asarray(aa_frame.particles.types)[list(aa_frame.particles.typeid)][constituent_indx[0]])

    rigid = hoomd.md.constrain.Rigid()
    rigid.body['rigid'] = {
        "constituent_types":const_particle_types,
        "positions": rel_const_pos,
        "orientations": [(1.0, 0.0, 0.0, 0.0)]* len(rel_const_pos),
        }
    integrator = hoomd.md.Integrator(dt=0.005, integrate_rotational_dof=True)
    rigid_simulation.operations.integrator = integrator
    integrator.rigid = rigid
    rigid_centers_and_free = hoomd.filter.Rigid(("center", "free"))
    nvt = hoomd.md.methods.ConstantVolume(
        filter=rigid_centers_and_free,
        thermostat=hoomd.md.methods.thermostats.Bussi(kT=kT))
    integrator.methods.append(nvt)
    
    cell = hoomd.md.nlist.Cell(buffer=0, exclusions=['body'])
    
    lj = hoomd.md.pair.LJ(nlist=cell)
    
    # use aa pps simulation to define lj and special lj forces between constituent particles
    for k, v in dict(pps_ff[0].params).items():
        lj.params[k] = v
        lj.r_cut[k] = 2.5
    
    lj.params[('rigid', ['rigid', 'ca', 'sh'])]= dict(epsilon=0, sigma=0)
    lj.r_cut[('rigid', ['rigid', 'ca', 'sh'])] = 0

    integrator.forces.append(lj)
    rigid_simulation.state.thermalize_particle_momenta(filter=rigid_centers_and_free,
                                             kT=kT)
    
    rigid_simulation.run(0)

    log_quantities = [
                        "kinetic_temperature",
                        "potential_energy",
                        "kinetic_energy",
                        "volume",
                        "pressure",
                        "pressure_tensor",
                    ]
    logger = hoomd.logging.Logger(categories=["scalar", "string", "particle"])
    logger.add(rigid_simulation, quantities=["timestep", "tps"])
    thermo_props = hoomd.md.compute.ThermodynamicQuantities(filter=hoomd.filter.All())
    rigid_simulation.operations.computes.append(thermo_props)
    logger.add(thermo_props, quantities=log_quantities)
    
    # for f in integrator.forces:
    #     logger.add(f, quantities=["energy", "forces", "energies"])

    logger.add(rigid_simulation.operations.integrator.rigid, quantities=["torques", "forces", "energies"])
    
    gsd_writer = hoomd.write.GSD(
        filename="logs/pps_{}/trajectory_{}.gsd".format(num_mols, kT),
        trigger=hoomd.trigger.Periodic(int(100)),
        mode="wb",
        logger=logger,
        filter=hoomd.filter.All(),
        dynamic=["momentum", "property"]
        )
    
    rigid_simulation.operations.writers.append(gsd_writer)
    return rigid_simulation

    


In [21]:
kT_list = [1., 1.5, 2., 3., 6.0]
for kT in kT_list:
    rigid_simulation = create_rigid_simulation(kT, num_mols=num_mols)
    rigid_simulation.run(2e5)
    rigid_simulation.operations.writers[0].flush()

## Read trajectories and gather samples

In [22]:
all_frames = []
for kT in kT_list:
    traj = gsd.hoomd.open("logs/pps_{}/trajectory_{}.gsd".format(num_mols, kT))
    for frame in traj:
        all_frames.append(frame)

In [23]:
zero_energy_frames = []
for i, f in enumerate(all_frames):
    if f.log['md/compute/ThermodynamicQuantities/potential_energy'][0] == 0:
        zero_energy_frames.append(i)

In [30]:
len(zero_energy_frames)

3

In [31]:
L = all_frames[0].configuration.box[0]
L

16.17887

In [32]:
import freud
def find_neighbors(points, L, NN=10):
    box = freud.box.Box.cube(L)
    aq = freud.locality.AABBQuery(box, points)

    query_points = points
    query_result = aq.query(query_points, dict(num_neighbors=NN, exclude_ii=True))
    nlist = query_result.toNeighborList()
    neighbor_list = np.asarray(list(zip(nlist.query_point_indices, nlist.point_indices)))
    return neighbor_list
    
    

## Creating dataset

In [33]:
N_rigid

20

In [34]:
positions = []
neighbor_lists = []
orientations = []
forces = []
torques = []
energies = []
for frame in all_frames:
    frame_positions = frame.particles.position[:N_rigid]
    neighbor_lists.append(find_neighbors(frame_positions,L=L))
    positions.append(frame_positions)
    orientations.append(frame.particles.orientation[:N_rigid])
    forces.append(frame.log['particles/md/constrain/Rigid/forces'][:N_rigid])
    torques.append(frame.log['particles/md/constrain/Rigid/torques'][:N_rigid])
    energies.append(frame.log['particles/md/constrain/Rigid/energies'][:N_rigid])

In [35]:
import rowan
orientations_R = rowan.to_matrix(orientations)

In [36]:
import pandas as pd
columns = [
    "position",
    "orientation_q",
    "orientation_R", 
    "force",
    "torque",
    "energy",
    "neighbor_list"
]

new_traj_df = pd.DataFrame(columns=columns)
new_traj_df["position"] =positions
new_traj_df["orientation_q"] = orientations
new_traj_df["orientation_R"] = orientations_R.tolist()
new_traj_df["force"] = forces
new_traj_df["torque"] = torques
new_traj_df["energy"] = energies
new_traj_df["neighbor_list"] = neighbor_lists

In [39]:
new_traj_df.head()

Unnamed: 0,position,orientation_q,orientation_R,force,torque,energy,neighbor_list
0,"[[-7.0045786, -1.3044931, 2.724815], [6.178810...","[[0.99994755, -0.0052401405, -0.0041892626, 0....","[[[0.9998452067375183, -0.01542622409760952, -...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.02055119...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.01530792...","[0.0, 0.0, -0.023841660400872017, -1.911377460...","[[0, 1], [0, 2], [0, 4], [0, 5], [0, 8], [0, 1..."
1,"[[-7.0041194, -1.3042572, 2.7254841], [6.17897...","[[0.9999236, -0.0063338433, -0.0050510275, 0.0...","[[[0.9997746348381042, -0.018608393147587776, ...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [-0.0155353...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.01242786...","[0.0, 0.0, -0.03920158068560585, -1.8101502849...","[[0, 1], [0, 2], [0, 4], [0, 5], [0, 8], [0, 1..."
2,"[[-7.0036864, -1.304035, 2.7261145], [6.179132...","[[0.99989617, -0.0073926, -0.005881182, 0.0108...","[[[0.9996939301490784, -0.021676652133464813, ...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [-0.0360869...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.00086050...","[0.0, 0.0, -0.0585577012683685, -3.20102351362...","[[0, 1], [0, 2], [0, 4], [0, 5], [0, 8], [0, 1..."
3,"[[-7.003288, -1.3038301, 2.7266958], [6.179276...","[[0.9998671, -0.008373184, -0.0066464925, 0.01...","[[[0.9996085166931152, -0.02450767159461975, -...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [-0.1056885...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.02728370...","[0.0, 0.0, -0.09599390387622062, -3.0758339830...","[[0, 1], [0, 2], [0, 4], [0, 5], [0, 8], [0, 1..."
4,"[[-7.002903, -1.3036326, 2.7272563], [6.179415...","[[0.99983585, -0.0093147075, -0.007378127, 0.0...","[[[0.9995169043540955, -0.02721627615392208, -...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [-0.3698406...","[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.06062873...","[0.0, 0.0, -0.1641128055353831, -4.12905343394...","[[0, 1], [0, 2], [0, 5], [0, 7], [0, 8], [0, 1..."


In [40]:
new_traj_df = new_traj_df.sample(frac=1).reset_index(drop=True)# shuffle dataset

In [41]:
new_traj_df.head()

Unnamed: 0,position,orientation_q,orientation_R,force,torque,energy,neighbor_list
0,"[[7.854781, 6.167966, -0.7061468], [7.5920525,...","[[0.11804035, -0.60454106, -0.47542, 0.62815],...","[[[-0.24119317531585693, 0.426527738571167, -0...","[[-0.032560537020126984, -0.13845078508830252,...","[[-0.07438335204394497, 0.019590239308743214, ...","[-0.03774833448930359, -0.04221207358538921, -...","[[0, 1], [0, 4], [0, 5], [0, 6], [0, 7], [0, 8..."
1,"[[0.22756346, -4.7955, -3.471893], [-1.4018935...","[[0.32330945, 0.041396655, 0.7658222, -0.55432...","[[[-0.7875146865844727, 0.4218405485153198, 0....","[[0.370507607549505, 0.10323729625758868, -0.2...","[[0.041219509396868165, 0.02397787898171705, 0...","[-0.07743759242525616, 0.0, 0.0, 0.0, 0.0, 0.0...","[[0, 1], [0, 3], [0, 4], [0, 6], [0, 9], [0, 1..."
2,"[[-7.54575, -1.6433389, 1.075051], [1.4806558,...","[[0.16604911, -0.35739535, 0.64561695, -0.6541...","[[[-0.6893925666809082, -0.2442483901977539, 0...","[[0.0, 0.0, 0.0], [0.8571618310187806, 1.71738...","[[0.0, 0.0, 0.0], [0.39588658552552547, 0.8876...","[0.0, -0.728097138442475, 0.0, -0.289011339583...","[[0, 1], [0, 2], [0, 3], [0, 6], [0, 9], [0, 1..."
3,"[[-7.157117, 0.8397648, -5.1161566], [1.615308...","[[0.54380304, -0.21196893, -0.18667746, 0.7902...","[[[-0.31869494915008545, -0.780343770980835, -...","[[-2.326207722273832, -3.626828347039404, 2.19...","[[0.621549248081565, 3.088407610462429, 2.5136...","[-2.0758235497750044, 0.0, -0.5556876245684912...","[[0, 1], [0, 2], [0, 3], [0, 5], [0, 6], [0, 7..."
4,"[[-1.9040401, -6.1461205, 8.032636], [-3.00553...","[[-0.04731683, 0.94606555, 0.30789527, -0.0890...","[[[0.7945578098297119, 0.5741555094718933, -0....","[[-2.025868625900227, 1.1573963471488213, 1.14...","[[-0.19886329238592673, -0.8160485266254588, 0...","[-0.437515274623962, -0.010559791687052166, -1...","[[0, 1], [0, 2], [0, 3], [0, 4], [0, 7], [0, 1..."


In [43]:
import os
target_datapath="/Users/Marjan/Documents/cmelab/aniso_ml_datasets/pps_20"

new_traj_df.to_pickle(os.path.join(target_datapath, "raw.pkl"))
test_frac = 0.1
val_frac = 0.1

dataset_len = new_traj_df.shape[0]
test_len = int(dataset_len * test_frac)
val_len = int(dataset_len * val_frac)

test_df = new_traj_df.iloc[:test_len]

val_df = new_traj_df.iloc[test_len: test_len + val_len]

train_df = new_traj_df.iloc[test_len + val_len:]


In [44]:
train_df.to_pickle(os.path.join(target_datapath, 'train.pkl'))
val_df.to_pickle(os.path.join(target_datapath, 'val.pkl'))
test_df.to_pickle(os.path.join(target_datapath, 'test.pkl'))