# Col150 Analysis

## Tabulate Energy + Topology Data + Calculate Radius of Gyration

In [1]:
import os, sys, shutil, subprocess
import scipy
from scipy import ndimage
from scipy.spatial import distance

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

path = os.getcwd()

In [2]:
def gyration(filename):
    df     = pd.DataFrame(columns=['x','y','z'])
    infile = open(filename, 'r')
    rfdata = infile.readlines()
    infile.close()
    rfdata = [i.rstrip('\n').split() for i in rfdata]
    N = int(rfdata[0][0])
    for j in range(0, N):
        df.at[j, ['x','y','z']]  = rfdata[ 5*j + 2]
    df = df.astype('float')
    df['rg'] = (df.x-df.x.mean())**2 + (df.y-df.y.mean())**2 + (df.z-df.z.mean())**2
    R = np.sqrt( (df.rg).mean() )
    del rfdata, df
    return R

In [3]:
Ncirc = 150
ematrix_dict = {0:'tilt', 1:'roll', 2:'twist', 3:'shift', 4:'slide', 5:'rise'}
incons = ['oring','pcirc']
inseqs = ['col'+str(i).zfill(2) for i in range(1, 10, 1)]


In [4]:
circles_c15   = []
circles_c30   = []

for filename in os.listdir(path+'/opt_log'):
    name = filename.split('.')[0]
    name2 = name.split('_')[2]
    
    if 'c15' in name2:
        circles_c15.append(name)
    elif 'c30' in name2:
        circles_c30.append(name)


In [5]:
# Compile all c15 circular data

df = pd.DataFrame()
for circ in sorted(circles_c15):
    for filename in os.listdir('opt_log'):
        if circ in filename and 'shift' not in filename and 'dom' not in filename:
            name = filename.split('.')[0]
            infile = open('opt_log/'+filename, 'r')
            indata = infile.readlines()
            indata = [i.rstrip('\n') for i in indata]
            infile.close()

            df.at[name, 'incon']       = name.split('_')[1]
            df.at[name, 'seq']         = name.split('_')[0]
            df.at[name, 'seq_type']    = name.split('_')[2][0:3]
            df.at[name, 'insert_type'] = name.split('_')[2][3:]
            
            df.at[name, 'forcefield'] = name.split('_')[3]
            df.at[name, 'tw']         = np.nan
            
            df.at[name, 'tot_bp']         = Ncirc
            
            for i in range(0, len(indata)):
                if 'initial energy:' in indata[i]:
                    df.at[name, 'eo'] = float( indata[i].split(':')[1] )
                elif 'final energy:' in indata[i]:
                    df.at[name, 'eopt'] = float( indata[i].split(':')[1] )
            ematrix = indata[-7:-1]
            for i in range(0, len(ematrix)):
                ematrix[i] = ematrix[i].replace('{', '').replace('}', '').split(',')
                for j in range(0, len(ematrix[i])):
                    ematrix[i][j] = float(ematrix[i][j])
                df.at[name, 'eopt-'+ematrix_dict[i]] = ematrix[i][i]
            del indata[:]

    for filename in os.listdir('opt_topology'):
        if circ in filename and 'shift' not in filename and 'dom' not in filename:
            name = filename.split('.')[0]
            name = name.replace('topo_', '')
            infile = open('opt_topology/'+filename, 'r')
            indata = infile.readlines()
            indata = [i.rstrip('\n') for i in indata]
            infile.close()
            indata = indata[-4:]
            for i in range(0, len(indata)):
                if 'Wr' in indata[i]:
                    df.at[name, 'Wr'] = float( indata[i].split('=')[1] )
                elif 'Tw' in indata[i]:
                    df.at[name, 'Tw'] = float( indata[i].split('=')[1] )
                elif 'Lk ' in indata[i]:
                    df.at[name, 'Lk'] = int( indata[i].split('=')[1] )
            del indata[:]
            
    for filename in os.listdir('opt_refframe'):
        if circ in filename and 'shift' not in filename and 'dom' not in filename:
            name = filename.split('.')[0]
            radius = gyration('opt_refframe/'+filename)
            df.at[name, 'Rg'] = radius

df.tot_bp = df.tot_bp.astype(int)
df = df.sort_values(by=['incon','seq','seq_type'])
df.to_csv("data_col150-RT_seq-primary_c15")
del df

In [6]:
# Compile all c30 circular data

df = pd.DataFrame()
for circ in sorted(circles_c30):
    for filename in os.listdir('opt_log'):
        if circ in filename and 'shift' not in filename and 'dom' not in filename:
            name = filename.split('.')[0]
            infile = open('opt_log/'+filename, 'r')
            indata = infile.readlines()
            indata = [i.rstrip('\n') for i in indata]
            infile.close()

            df.at[name, 'incon']       = name.split('_')[1]
            df.at[name, 'seq']         = name.split('_')[0]
            df.at[name, 'seq_type']    = name.split('_')[2]
            df.at[name, 'insert_type'] = name.split('_')[2][3:]
            
            df.at[name, 'forcefield'] = name.split('_')[3]
            df.at[name, 'tw']         = np.nan
            
            df.at[name, 'tot_bp']         = Ncirc
            df.at[name, 'tot_bp']         = Ncirc
            
            for i in range(0, len(indata)):
                if 'initial energy:' in indata[i]:
                    df.at[name, 'eo'] = float( indata[i].split(':')[1] )
                elif 'final energy:' in indata[i]:
                    df.at[name, 'eopt'] = float( indata[i].split(':')[1] )
            ematrix = indata[-7:-1]
            for i in range(0, len(ematrix)):
                ematrix[i] = ematrix[i].replace('{', '').replace('}', '').split(',')
                for j in range(0, len(ematrix[i])):
                    ematrix[i][j] = float(ematrix[i][j])
                df.at[name, 'eopt-'+ematrix_dict[i]] = ematrix[i][i]
            del indata[:]

    for filename in os.listdir('opt_topology'):
        if circ in filename and 'shift' not in filename and 'dom' not in filename:
            name = filename.split('.')[0]
            name = name.replace('topo_', '')
            infile = open('opt_topology/'+filename, 'r')
            indata = infile.readlines()
            indata = [i.rstrip('\n') for i in indata]
            infile.close()
            indata = indata[-4:]
            for i in range(0, len(indata)):
                if 'Wr' in indata[i]:
                    df.at[name, 'Wr'] = float( indata[i].split('=')[1] )
                elif 'Tw' in indata[i]:
                    df.at[name, 'Tw'] = float( indata[i].split('=')[1] )
                elif 'Lk ' in indata[i]:
                    df.at[name, 'Lk'] = int( indata[i].split('=')[1] )
            del indata[:]
            
    for filename in os.listdir('opt_refframe'):
        if circ in filename and 'shift' not in filename and 'dom' not in filename:
            name = filename.split('.')[0]
            radius = gyration('opt_refframe/'+filename)
            df.at[name, 'Rg'] = radius

df.tot_bp = df.tot_bp.astype(int)
df = df.sort_values(by=['incon','seq','seq_type'])
df.to_csv("data_col150-RT_seq-primary_c30")
del df

In [7]:
df = pd.read_csv("data_col150-RT_seq-primary_c15", index_col=0)
df

Unnamed: 0,incon,seq,seq_type,insert_type,forcefield,tw,tot_bp,eo,eopt,eopt-tilt,eopt-roll,eopt-twist,eopt-shift,eopt-slide,eopt-rise,Wr,Tw,Lk,Rg
col01_oring_c15v1_ideal-rt,oring,col01,c15,v1,ideal-rt,,150,99.120116,31.585427,9.223307,9.223072,13.119494,0.009777,0.009777,0.000000,2.241920e-10,15.0000,15.0,81.174909
col01_oring_c15v1_kabsch-rt,oring,col01,c15,v1,kabsch-rt,,150,143.262188,17.902216,8.358328,8.351879,1.190068,0.000926,0.000925,0.000089,2.518200e-02,14.9748,15.0,80.979825
col01_oring_c15v1_olson-rt,oring,col01,c15,v1,olson-rt,,150,220.065902,17.628959,8.331030,8.332439,0.963773,0.000822,0.000822,0.000075,1.768680e-02,14.9823,15.0,78.672683
col01_oring_c15v1_oring-rt,oring,col01,c15,v1,oring-rt,,150,89.260937,18.446400,9.223211,9.223189,0.000000,0.000000,0.000000,0.000000,-4.331470e-11,15.0000,15.0,81.174955
col01_oring_c15v2_ideal-rt,oring,col01,c15,v2,ideal-rt,,150,99.120116,31.585427,9.223307,9.223072,13.119494,0.009777,0.009777,0.000000,2.241920e-10,15.0000,15.0,81.174909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
col09_pcirc_c15v2_oring-rt,pcirc,col09,c15,v2,oring-rt,,150,499.114549,22.756785,8.130308,16.840612,9.902790,0.001554,0.001532,0.001240,-9.441070e-01,14.9441,14.0,54.865869
col09_pcirc_c15v3_ideal-rt,pcirc,col09,c15,v3,ideal-rt,,150,351.389001,17.241678,6.834964,13.426019,6.020413,0.000940,0.000944,0.000126,9.361970e-04,13.9991,14.0,81.162391
col09_pcirc_c15v3_kabsch-rt,pcirc,col09,c15,v3,kabsch-rt,,150,444.122517,17.840761,6.602632,9.800570,6.010336,0.002518,0.002479,0.001478,1.315870e-02,13.9868,14.0,80.790068
col09_pcirc_c15v3_olson-rt,pcirc,col09,c15,v3,olson-rt,,150,957.231742,27.531668,9.107916,19.479838,14.831837,0.007271,0.007196,0.010701,-3.772350e-01,14.3772,14.0,69.917963


In [8]:
del df