In [1]:
import signac
import pandas as pd
import numpy as np
import gsd.hoomd
import rowan

In [2]:
project = signac.get_project()
jobs = list(project.find_jobs())

In [3]:
positions = []
orientations = []
energies = []
net_force = []
net_torque = []
for j in jobs:
    data = pd.read_pickle(j.fn("raw_data.pkl"))
    positions.extend(data['position'].tolist())
    orientations.extend(data['orientation'].tolist())
    net_force.extend(data['net_force'].tolist())
    net_torque.extend(data['net_torque'].tolist())
    energies.extend(data['energy'].tolist())

In [4]:
lattice = gsd.hoomd.open("lattice_init.gsd")

In [5]:
lattice[0].configuration.box

array([10., 10., 10.,  0.,  0.,  0.], dtype=float32)

In [6]:
positions = np.asarray(positions)
orientations = np.asarray(orientations)
net_force = np.asarray(net_force)
net_torque = np.asarray(net_torque)
energies = np.asarray(energies)


In [7]:
from functools import reduce

keep_f_00 = np.where(np.logical_and(net_force[:, 0, 0] > -10, net_force[:, 0, 0] < 10))[0]
keep_f_01 = np.where(np.logical_and(net_force[:, 0, 1] > -10, net_force[:, 0, 1] < 10))[0]
keep_f_02 = np.where(np.logical_and(net_force[:, 0, 2] > -10, net_force[:, 0, 2] < 10))[0]

keep_f_10 = np.where(np.logical_and(net_force[:, 1, 0] > -10, net_force[:, 1, 0] < 10))[0]
keep_f_11 = np.where(np.logical_and(net_force[:, 1, 1] > -10, net_force[:, 1, 1] < 10))[0]
keep_f_12 = np.where(np.logical_and(net_force[:, 1, 2] > -10, net_force[:, 1, 2] < 10))[0]
keep_idx = reduce(np.intersect1d, (keep_f_00, keep_f_01, keep_f_02, keep_f_10, keep_f_11, keep_f_12))

In [8]:
keep_idx.shape

(73400,)

In [9]:
keep_force = net_force[keep_idx]
keep_energy = energies[keep_idx]
keep_pos = positions[keep_idx]
keep_orient = orientations[keep_idx]
keep_torque = net_torque[keep_idx]

In [10]:
min(keep_energy)

-2.1393925253592956

In [11]:
particle_pos = []
neighbor_pos = []
# rel_pos = []
# particle_orient = []
# neighbor_orient = []
# rel_orient = []
# particle_orient_rotated = []
# neighbor_orient_rotated = []
features = []
particle_force = []
particle_torque = []
particle_energy = []
for pos, orient, force, torque, energy in zip(keep_pos, keep_orient, keep_force, keep_torque, keep_energy):
    particle_pos.extend([pos[0], pos[1]])
    neighbor_pos.extend([pos[1], pos[0]])
    dr_1 = pos[0] - pos[1]
    dr_2 = pos[1] - pos[0]

    # r = np.linalg.norm(dr_1, keepdims=True)
    
    q1 = orient[0]
    q2 = orient[1]
    
    dr_1_q = np.concatenate([[0], dr_1])
    dr_2_q = np.concatenate([[0], dr_2])

    # calculate q1 applied to q2 and vice versa
    q1q2 = rowan.multiply(q1, rowan.conjugate(q2))
    q2q1 = rowan.multiply(q2, rowan.conjugate(q1))

    # rotate q1 and q2 based on vector dr_1
    q1_dr1_rotated = rowan.multiply(dr_1_q, rowan.multiply(q1, rowan.conjugate(dr_1_q)))
    q2_dr1_rotated = rowan.multiply(dr_1_q, rowan.multiply(q2, rowan.conjugate(dr_1_q)))

    # rotated q2 and q1 based on vector dr_2
    q2_dr2_rotated = rowan.multiply(dr_2_q, rowan.multiply(q2, rowan.conjugate(dr_2_q)))
    q1_dr2_rotated = rowan.multiply(dr_2_q, rowan.multiply(q1, rowan.conjugate(dr_2_q)))
                                    

    features_1 = np.concatenate([dr_1, q1, q2, q1q2, q2q1, q1_dr1_rotated, q2_dr1_rotated])
    features_2 = np.concatenate([dr_2, q2, q1, q2q1, q1q2, q2_dr2_rotated, q1_dr2_rotated])

    features.extend([features_1, features_2])
    particle_force.extend([force[0], force[1]])
    particle_torque.extend([torque[0], torque[1]])
    particle_energy.extend([energy/2, energy/2])

In [12]:
features_1.shape

(27,)

In [13]:
len(particle_energy)

146800

In [14]:
columns = [
    "particle_pos",
    "neighbor_pos",
    "features",
    "force",
    "torque",
    "energy"
]

In [15]:
new_traj_df = pd.DataFrame(columns=columns)
new_traj_df["particle_pos"] = particle_pos
new_traj_df["neighbor_pos"] = neighbor_pos
new_traj_df["features"] = features
new_traj_df["force"] = particle_force
new_traj_df["torque"] = particle_torque
new_traj_df["energy"] = particle_energy

In [16]:
import os
target_datapath = "/home/erjank_project/caesreu/datasets/dimer_Aug16/"
if not os.path.exists(target_datapath):
    os.mkdir(target_datapath)

In [17]:
new_traj_df.to_pickle(os.path.join(target_datapath, "raw.pkl"))

In [18]:
new_traj_df = new_traj_df.sample(frac=1).reset_index(drop=True)

test_frac = 0.1
val_frac = 0.1

dataset_len = new_traj_df.shape[0]
test_len = int(dataset_len * test_frac)
val_len = int(dataset_len * val_frac)

test_df = new_traj_df.loc[:test_len]

val_df = new_traj_df.loc[test_len: test_len + val_len]

train_df = new_traj_df.loc[test_len + val_len:]

In [19]:
train_df.to_pickle(os.path.join(target_datapath, 'train.pkl'))
val_df.to_pickle(os.path.join(target_datapath, 'val.pkl'))
test_df.to_pickle(os.path.join(target_datapath, 'test.pkl'))

In [20]:
train_df.shape

(117440, 6)

In [21]:
117440 * 0.01

1174.4