In [None]:
import os
import sys
import time

sys.path.append('../..')
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import matplotlib.pyplot as plt
import mdtraj as md
import numpy as np
#import seaborn as sns
import tensorflow as tf

from Constants import EV_TO_KJ, H_TO_KJ, BOHR_TO_ANGSTROM

from ase import Atoms
from ase.io import read
from ase.io.trajectory import Trajectory

from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA
from scipy.constants import Boltzmann, Avogadro
from scipy.stats import linregress

import mdtraj.utils.unit.unit_definitions as u

In [None]:
SYSTEMS_DATA = np.load('../../../data/test_sets/DATA_CSP_ranking.npy', allow_pickle=True).item()


RT = lambda T: (Boltzmann * Avogadro) * T / 1000

RMSE = lambda x, y: np.round(np.sqrt(np.mean(np.square(np.subtract(x, y)))), 1)
MAE =  lambda x, y: np.round(np.mean(np.abs(np.subtract(x, y))), 1) 
ME =  lambda x, y: np.round(np.mean(np.subtract(x, y)), 1)
STD =  lambda x, y: np.round(np.std(np.subtract(x, y)), 1)
MAE =  lambda x, y: np.round(np.mean(np.abs(np.subtract(x, y))), 1)

def average_data(data, start, end):
    data = np.array([x for x in data])
    mean = np.mean(data[start:end], axis=1)
    mean, std = np.mean(mean), np.std(mean)
    return mean, std

In [None]:
FOLDER_PATH = 'data_md/'

In [None]:
for SYSTEM_NAME in ['XXVI', 'XXII', 'XXIII']:
    for POLY_KEY in SYSTEMS_DATA[SYSTEM_NAME]:
        for REPLICA in range(1):
            try:            
                NAME_TRAJFILE = f'{FOLDER_PATH}{SYSTEM_NAME}/{SYSTEM_NAME}_{POLY_KEY}_{REPLICA}.traj'
                traj_ase = Trajectory(NAME_TRAJFILE)
                NAME_PDBFILE = NAME_TRAJFILE[:-4] + 'pdb'
                cells = []
                try:
                    os.remove(NAME_PDBFILE)
                except:
                    pass
                for atoms in traj_ase:
                    atoms.write(NAME_PDBFILE, append=True)
                    cells.append(atoms.get_cell().array)
                cells = np.array(cells)
    
                trajectory = md.load(NAME_PDBFILE)
                trajectory.unitcell_vectors = 0.1 * cells
                trajectory.save_dcd(NAME_TRAJFILE[:-4] + 'dcd')
                trajectory[0].save_pdb(NAME_TRAJFILE[:-5] + '_topo.pdb')
            except:
                continue

In [None]:
SYSTEM_NAME = 'XXVI'
if SYSTEM_NAME == 'XXII':
    T = 150
else:
    T = 300
N_REPLICA = 1
PREFACTOR_P = 1e5 * Avogadro * 1e-33 
kbT = (Boltzmann * Avogadro * T * 1e-3)
T_start, T_end = 0, 1000

MOL_SIZE = len(SYSTEMS_DATA[SYSTEM_NAME]['N100']['monomer_symbols'])
N_REPLICA = 1

RESULTS = np.load(f'data/RESULTS_{SYSTEM_NAME}.npy', allow_pickle=True).item()
DATA = {}

In [None]:
for POLY_KEY in SYSTEMS_DATA[SYSTEM_NAME]:
    DATA[POLY_KEY] = {}
    DATA[POLY_KEY]['V_pot'] = []
    DATA[POLY_KEY]['T_sim'] = []
    DATA[POLY_KEY]['density'] = []
    DATA[POLY_KEY]['volume'] = []
    DATA[POLY_KEY]['rmsd'] = []
    V_pot, T_sim, rmsd, density, volume, cells = [], [], [], [], [], []
    V_pot_mu, T_sim_mu, rmsd_mu, density_mu = [], [], [], []
    for REPLICA in range(N_REPLICA):
        try:
            FILE_NAME = f'{FOLDER_PATH}{SYSTEM_NAME}/{SYSTEM_NAME}_{POLY_KEY}_{REPLICA}'
            traj = md.load(f'{FILE_NAME}.dcd', top=f'{FILE_NAME}_topo.pdb')
            N_MOLECULES = traj.n_atoms // MOL_SIZE
            if len(traj) >= T_end:
                traj = traj[T_start:T_end]
                MOL_SIZE = len(SYSTEMS_DATA[SYSTEM_NAME]['N100']['monomer_symbols'])
                FILE_NAME = f'{FOLDER_PATH}{SYSTEM_NAME}/{SYSTEM_NAME}_{POLY_KEY}_{REPLICA}.log'
                V_pot_, T_sim_ = np.loadtxt(FILE_NAME, skiprows=1, usecols=[2, 4], unpack=True) 
                V_pot_ = (V_pot_ / N_MOLECULES) * EV_TO_KJ
                V_pot.append(V_pot_[T_start:T_end])

                T_sim.append(T_sim_[T_start:T_end])
                density_ = md.density(traj) / N_MOLECULES
                volume_ = traj.unitcell_volumes * 1e3 / N_MOLECULES
                rmsd_ = md.rmsd(traj, traj[0])            
                density.append(density_)
                volume.append(volume_)            
                rmsd.append(rmsd_)
                cells.append(traj.unitcell_vectors * 10)
                DATA[POLY_KEY]['V_pot'] = np.array(V_pot)
                DATA[POLY_KEY]['T_sim'] = np.array(T_sim)
                DATA[POLY_KEY]['density'] = np.array(density)
                DATA[POLY_KEY]['volume'] = np.array(volume)
                DATA[POLY_KEY]['rmsd'] = np.array(rmsd)
                DATA[POLY_KEY]['cells'] = np.array(cells)

                V_pot_mu = np.mean(DATA[POLY_KEY]['V_pot'], axis=-1)
                T_sim_mu = np.mean(DATA[POLY_KEY]['T_sim'], axis=-1) 
                DATA[POLY_KEY]['V_pot_mu'] = np.mean(V_pot_mu)
                DATA[POLY_KEY]['V_pot_std'] = np.std(V_pot_mu)
                DATA[POLY_KEY]['T_sim_mu'] = np.mean(T_sim_mu)
                DATA[POLY_KEY]['T_sim_std'] = np.std(T_sim_mu)
                DATA[POLY_KEY]['cell_mu'] = np.mean(DATA[POLY_KEY]['cells'], axis=(0, 1))
                DATA[POLY_KEY]['n_molecules'] = N_MOLECULES
            else:
                del DATA[POLY_KEY]
                print(POLY_KEY, '-', len(traj))
                continue
            if False:
                for x in range(N_REPLICA):
                    plt.plot(DATA[POLY_KEY]['V_pot'][x])
                    plt.hlines(DATA[POLY_KEY]['V_pot_mu'], T_start, T_end, color='red')
                    ax = plt.gca()
                    ax.set_title(POLY_KEY)
                plt.show()
            if False:
                for x in range(N_REPLICA):
                    plt.plot(DATA[POLY_KEY]['T_sim'][x])
                    plt.hlines(DATA[POLY_KEY]['T_sim_mu'], T_start, T_end, color='red')
                    ax = plt.gca()
                    ax.set_title(POLY_KEY)
                    ax.set_ylim(100, 250)
                plt.show()
            if False:
                for x in range(N_REPLICA):
                    plt.plot(DATA[POLY_KEY]['density'][x])                    
                    ax = plt.gca()
                    ax.set_title(POLY_KEY)
                plt.show()
            
        except:
            del DATA[POLY_KEY]
            print(POLY_KEY)


In [None]:
np.save(f'data/MD_DATA_{SYSTEM_NAME}', DATA)

In [None]:
for SYSTEM_NAME in SYSTEMS_DATA:
    for POLY_KEY in SYSTEMS_DATA[SYSTEM_NAME]:
        if not os.path.isfile(os.path.join('data_ah/', f'{SYSTEM_NAME}/{SYSTEM_NAME}_{POLY_KEY}_ediffs.npy')):
            print(SYSTEM_NAME, POLY_KEY)