In [1]:
import signac
import pandas as pd
import numpy as np
import gsd.hoomd
import rowan

In [2]:
project = signac.get_project()
jobs = list(project.find_jobs())

In [3]:
positions = []
orientations = []
energies = []
net_force = []
net_torque = []
for j in jobs:
    data = pd.read_pickle(j.fn("raw_data.pkl"))
    positions.extend(data['position'].tolist())
    orientations.extend(data['orientation'].tolist())
    net_force.extend(data['net_force'].tolist())
    net_torque.extend(data['net_torque'].tolist())
    energies.extend(data['energy'].tolist())

In [4]:
lattice = gsd.hoomd.open("lattice_init.gsd")

In [5]:
lattice[0].configuration.box

array([10., 10., 10.,  0.,  0.,  0.], dtype=float32)

In [6]:
positions = np.asarray(positions)
orientations = np.asarray(orientations)
net_force = np.asarray(net_force)
net_torque = np.asarray(net_torque)
energies = np.asarray(energies)


In [27]:
from functools import reduce
keep_f_00 = np.where(np.logical_and(net_force[:, 0, 0] > -1000, net_force[:, 0, 0] < 1000))[0]
keep_f_01 = np.where(np.logical_and(net_force[:, 0, 1] > -1000, net_force[:, 0, 1] < 1000))[0]
keep_f_02 = np.where(np.logical_and(net_force[:, 0, 2] > -1000, net_force[:, 0, 2] < 1000))[0]

keep_f_10 = np.where(np.logical_and(net_force[:, 1, 0] > -1000, net_force[:, 1, 0] < 1000))[0]
keep_f_11 = np.where(np.logical_and(net_force[:, 1, 1] > -1000, net_force[:, 1, 1] < 1000))[0]
keep_f_12 = np.where(np.logical_and(net_force[:, 1, 2] > -1000, net_force[:, 1, 2] < 1000))[0]
keep_idx = reduce(np.intersect1d, (keep_f_00, keep_f_01, keep_f_02, keep_f_10, keep_f_11, keep_f_12))

In [28]:
keep_idx.shape

(143538,)

In [29]:
keep_force = net_force[keep_idx]
keep_energy = energies[keep_idx]
keep_pos = positions[keep_idx]
keep_orient = orientations[keep_idx]
keep_torque = net_torque[keep_idx]

In [30]:
particle_pos = []
neighbor_pos = []
particle_orient = []
neighbor_orient = []
particle_force = []
particle_torque = []
particle_energy = []
for pos, orient, force, torque, energy in zip(keep_pos, keep_orient, keep_force, keep_torque, keep_energy):
    particle_pos.extend([pos[0], pos[1]])
    neighbor_pos.extend([pos[1], pos[0]])
    particle_orient.extend([orient[0], orient[1]])
    neighbor_orient.extend([orient[1], orient[0]])
    particle_force.extend([force[0], force[1]])
    particle_torque.extend([torque[0], torque[1]])
    particle_energy.extend([energy/2, energy/2])

In [31]:
len(particle_energy)

287076

In [10]:
neighbor_orient[0]

array([0.11585982, 0.23649178, 0.01719318, 0.96454784])

In [32]:
columns = [
    "particle_pos",
    "neighbor_pos",
    "particle_orient",
    "neighbor_orient"
    "force",
    "torque",
    "energy"
]

In [33]:
new_traj_df = pd.DataFrame(columns=columns)

new_traj_df["particle_pos"] = particle_pos
new_traj_df["neighbor_pos"] = neighbor_pos

new_traj_df["particle_orient"] = particle_orient
new_traj_df["neighbor_orient"] = neighbor_orient

new_traj_df["force"] = particle_force
new_traj_df["torque"] = particle_torque
new_traj_df["energy"] = particle_energy

In [34]:
import os
target_datapath = "/home/erjank_project/caesreu/datasets/dimer_Jul27/"
if not os.path.exists(target_datapath):
    os.mkdir(target_datapath)

In [35]:
new_traj_df.to_pickle(os.path.join(target_datapath, "raw.pkl"))

In [36]:
new_traj_df = new_traj_df.sample(frac=1).reset_index(drop=True)

test_frac = 0.1
val_frac = 0.1

dataset_len = new_traj_df.shape[0]
test_len = int(dataset_len * test_frac)
val_len = int(dataset_len * val_frac)

test_df = new_traj_df.loc[:test_len]

val_df = new_traj_df.loc[test_len: test_len + val_len]

train_df = new_traj_df.loc[test_len + val_len:]

In [37]:
train_df.to_pickle(os.path.join(target_datapath, 'train.pkl'))
val_df.to_pickle(os.path.join(target_datapath, 'val.pkl'))
test_df.to_pickle(os.path.join(target_datapath, 'test.pkl'))