In [1]:
import os, sys, shutil, scipy
import numpy as np
import pandas as pd
from scipy.spatial import distance
from sklearn.decomposition import PCA

path = os.getcwd()

if 'Young_Research' in path:
    ffpath   = "C:\\Users\\Young_Research\\Documents\\Rutgers\\Research\\2020_PHY_MSc\\data\\forcefields"
    inpath   = "C:\\Users\\Young_Research\\Documents\\Rutgers\\Research\\2020_PHY_MSc\\data\\initial_conditions"
else:
    ffpath   = "/home/rty10/Documents/Experiments/Optimizations/Opt_ForceFields"
    inpath   = ""


In [2]:
def gyration(filename):
    df     = pd.DataFrame(columns=['x','y','z'])
    infile = open(filename, 'r')
    rfdata = infile.readlines()
    infile.close()
    rfdata = [i.rstrip('\n').split() for i in rfdata]
    N = int(rfdata[0][0])
    for j in range(0, N):
        df.at[j, ['x','y','z']]  = rfdata[ 5*j + 2]
    df = df.astype('float')
    df['rg'] = (df.x-df.x.mean())**2 + (df.y-df.y.mean())**2 + (df.z-df.z.mean())**2
    R = np.sqrt( (df.rg).mean() )
    del rfdata, df
    return R

In [5]:
Ncirc = 150
ematrix_dict = {0:'tilt', 1:'roll', 2:'twist', 3:'shift', 4:'slide', 5:'rise'}
incons = ['pc14','pc15','or15']
inseqs = ['col'+str(i).zfill(2) for i in range(5, 10, 1)]

circles= [filename.split('.')[0] for filename in os.listdir(path+'/opt_log')]


In [6]:
df = pd.DataFrame()

for circ in sorted(circles):
    infile = open('opt_log/'+circ+'.log', 'r')
    indata = infile.readlines()
    indata = [i.rstrip('\n') for i in indata]
    infile.close()
    
    df.at[circ, 'seq']        = circ.split('_')[0]
    df.at[circ, 'incon']      = circ.split('_')[1]
    df.at[circ, 'forcefield'] = circ.split('_')[2]
    df.at[circ, 'tot_bp']     = Ncirc

    for i in range(0, len(indata)):
        if 'initial energy:' in indata[i]:
            df.at[circ, 'eo'] = float( indata[i].split(':')[1] )
        elif 'final energy:' in indata[i]:
            df.at[circ, 'eopt'] = float( indata[i].split(':')[1] )
    ematrix = indata[-7:-1]
    for i in range(0, len(ematrix)):
        ematrix[i] = ematrix[i].replace('{', '').replace('}', '').split(',')
        for j in range(0, len(ematrix[i])):
            ematrix[i][j] = float(ematrix[i][j])
        df.at[circ, 'eopt-'+ematrix_dict[i]] = ematrix[i][i]
    df.at[circ, 'eopt-coupled'] = ematrix[1][2]
    del indata[:]

    infile = open('opt_topology/topo_'+circ+'.txt', 'r')
    indata = infile.readlines()
    indata = [i.rstrip('\n') for i in indata]
    infile.close()
    indata = indata[-4:]
    for i in range(0, len(indata)):
        if 'Wr' in indata[i]:
            df.at[circ, 'Wr'] = float( indata[i].split('=')[1] )
        elif 'Tw' in indata[i]:
            df.at[circ, 'Tw'] = float( indata[i].split('=')[1] )
        elif 'Lk ' in indata[i]:
            df.at[circ, 'Lk'] = int( indata[i].split('=')[1] )
    del indata[:]
    
    radius = gyration('opt_refframe/'+circ+'.dat')
    df.at[circ, 'Rg'] = radius

df.tot_bp = df.tot_bp.astype(int)
df = df.sort_values(by=['incon','seq'])

df.to_csv("data_col150_set03")

del df

In [7]:
df = pd.read_csv("data_col150_set03", index_col=0)
df.head()

Unnamed: 0,seq,incon,forcefield,tot_bp,eo,eopt,eopt-tilt,eopt-roll,eopt-twist,eopt-shift,eopt-slide,eopt-rise,eopt-coupled,Wr,Tw,Lk,Rg
col05_or15_bdna98-crt,col05,or15,bdna98-crt,150,255.707516,16.078913,6.088202,14.375129,6.794919,0.000626,0.000625,1e-06,-5.590294,0.039685,14.9603,15.0,79.480113
col05_or15_bdna98-frt,col05,or15,bdna98-frt,150,256.418115,13.53276,4.347794,15.998877,8.842456,0.000414,0.000414,0.0,-7.828597,0.04572,14.9543,15.0,79.448902
col05_or15_ideal-crt,col05,or15,ideal-crt,150,108.880931,24.484078,6.345419,19.020368,18.896761,0.004735,0.004733,3e-06,-9.893971,0.004543,14.9955,15.0,81.148214
col05_or15_ideal-frt,col05,or15,ideal-frt,150,123.527126,20.198586,4.73716,21.722319,19.608236,0.002759,0.002759,4e-06,-12.937326,0.010793,14.9892,15.0,81.114858
col05_or15_kabsch-crt,col05,or15,kabsch-crt,150,226.296391,57.3999,6.4489,39.458551,69.763466,0.020991,0.020931,6.9e-05,-29.156504,0.022724,14.9773,15.0,81.041773


In [8]:
df.loc[(df.incon=='pc14')&(df.forcefield=='ideal-crt')]

Unnamed: 0,seq,incon,forcefield,tot_bp,eo,eopt,eopt-tilt,eopt-roll,eopt-twist,eopt-shift,eopt-slide,eopt-rise,eopt-coupled,Wr,Tw,Lk,Rg
col05_pc14_ideal-crt,col05,pc14,ideal-crt,150,351.389126,16.645444,6.253437,14.705188,7.301202,0.000753,0.000753,0.0,-5.807945,0.000944,13.9991,14.0,81.169264
col06_pc14_ideal-crt,col06,pc14,ideal-crt,150,343.200501,16.59641,6.198244,14.78762,7.395109,0.00077,0.000771,1e-06,-5.893052,3.2e-05,14.0,14.0,81.160399
col07_pc14_ideal-crt,col07,pc14,ideal-crt,150,351.389069,16.692348,6.292457,14.593323,7.231192,0.000773,0.000773,1e-06,-5.713086,0.002361,13.9976,14.0,81.121812
col08_pc14_ideal-crt,col08,pc14,ideal-crt,150,343.200636,16.580035,6.250664,14.663364,7.196251,0.000728,0.000729,2e-06,-5.765851,0.000535,13.9995,14.0,81.141889
col09_pc14_ideal-crt,col09,pc14,ideal-crt,150,351.388235,17.196724,6.692662,13.742661,6.524393,0.00115,0.001152,9.8e-05,-4.882696,0.000934,13.9991,14.0,81.163777


In [9]:
df.loc[(df.incon=='pc14')&(df.forcefield=='ideal-frt')]

Unnamed: 0,seq,incon,forcefield,tot_bp,eo,eopt,eopt-tilt,eopt-roll,eopt-twist,eopt-shift,eopt-slide,eopt-rise,eopt-coupled,Wr,Tw,Lk,Rg
col05_pc14_ideal-frt,col05,pc14,ideal-frt,150,351.388717,13.968931,4.476841,16.49235,9.354939,0.000421,0.000421,0.0,-8.17802,0.002581,13.9974,14.0,81.161442
col06_pc14_ideal-frt,col06,pc14,ideal-frt,150,351.388717,13.968931,4.476841,16.49235,9.354939,0.000421,0.000421,0.0,-8.17802,0.002581,13.9974,14.0,81.161442
col07_pc14_ideal-frt,col07,pc14,ideal-frt,150,351.388717,13.968931,4.476841,16.49235,9.354939,0.000421,0.000421,0.0,-8.17802,0.002581,13.9974,14.0,81.161442
col08_pc14_ideal-frt,col08,pc14,ideal-frt,150,351.388717,13.968931,4.476841,16.49235,9.354939,0.000421,0.000421,0.0,-8.17802,0.002581,13.9974,14.0,81.161442
col09_pc14_ideal-frt,col09,pc14,ideal-frt,150,351.388717,13.968931,4.476841,16.49235,9.354939,0.000421,0.000421,0.0,-8.17802,0.002581,13.9974,14.0,81.161442


In [10]:
del df