# Col150 Analysis

## Tabulate Energy + Topology Data + Calculate Radius of Gyration

In [None]:
import os, sys, shutil, subprocess
import scipy
from scipy import ndimage
from scipy.spatial import distance

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

path = os.getcwd()

In [None]:
def gyration(filename):
    df     = pd.DataFrame(columns=['x','y','z'])
    infile = open(filename, 'r')
    rfdata = infile.readlines()
    infile.close()
    rfdata = [i.rstrip('\n').split() for i in rfdata]
    N = int(rfdata[0][0])
    for j in range(0, N):
        df.at[j, ['x','y','z']]  = rfdata[ 5*j + 2]
    df = df.astype('float')
    df['rg'] = (df.x-df.x.mean())**2 + (df.y-df.y.mean())**2 + (df.z-df.z.mean())**2
    R = np.sqrt( (df.rg).mean() )
    del rfdata, df
    return R

In [None]:
Ncirc = 150
ematrix_dict = {0:'tilt', 1:'roll', 2:'twist', 3:'shift', 4:'slide', 5:'rise'}
incons = ['oring','pcirc']
inseqs = ['col'+str(i).zfill(2) for i in range(1, 10, 1)]


In [None]:
circles= []

for filename in os.listdir(path+'/opt_log'):
    name = filename.split('.')[0]
    name2 = name.split('_')[2]
    circles.append(name)


In [None]:
# Compile all c15 circular data

df = pd.DataFrame()
for circ in sorted(circles):
    for filename in os.listdir('opt_log'):
        if circ in filename:
            name = filename.split('.')[0]
            infile = open('opt_log/'+filename, 'r')
            indata = infile.readlines()
            indata = [i.rstrip('\n') for i in indata]
            infile.close()

            df.at[name, 'incon']       = name.split('_')[1]
            df.at[name, 'seq']         = name.split('_')[0]
            df.at[name, 'seq_type']    = name.split('_')[2][0:3]
            df.at[name, 'insert_type'] = name.split('_')[2][3:]
            
            df.at[name, 'forcefield'] = name.split('_')[3]
            df.at[name, 'tw']         = np.nan
            
            df.at[name, 'tot_bp']         = Ncirc
            
            for i in range(0, len(indata)):
                if 'initial energy:' in indata[i]:
                    df.at[name, 'eo'] = float( indata[i].split(':')[1] )
                elif 'final energy:' in indata[i]:
                    df.at[name, 'eopt'] = float( indata[i].split(':')[1] )
            ematrix = indata[-7:-1]
            for i in range(0, len(ematrix)):
                ematrix[i] = ematrix[i].replace('{', '').replace('}', '').split(',')
                for j in range(0, len(ematrix[i])):
                    ematrix[i][j] = float(ematrix[i][j])
                df.at[name, 'eopt-'+ematrix_dict[i]] = ematrix[i][i]
            del indata[:]

    for filename in os.listdir('opt_topology'):
        if circ in filename:
            name = filename.split('.')[0]
            name = name.replace('topo_', '')
            infile = open('opt_topology/'+filename, 'r')
            indata = infile.readlines()
            indata = [i.rstrip('\n') for i in indata]
            infile.close()
            indata = indata[-4:]
            for i in range(0, len(indata)):
                if 'Wr' in indata[i]:
                    df.at[name, 'Wr'] = float( indata[i].split('=')[1] )
                elif 'Tw' in indata[i]:
                    df.at[name, 'Tw'] = float( indata[i].split('=')[1] )
                elif 'Lk ' in indata[i]:
                    df.at[name, 'Lk'] = int( indata[i].split('=')[1] )
            del indata[:]
            
    for filename in os.listdir('opt_refframe'):
        if circ in filename:
            name = filename.split('.')[0]
            radius = gyration('opt_refframe/'+filename)
            df.at[name, 'Rg'] = radius

df.tot_bp = df.tot_bp.astype(int)
df = df.sort_values(by=['incon','seq','seq_type'])
df.to_csv("data_col150-RT_seq-primary_std")
del df

In [None]:
df = pd.read_csv("data_col150-RT_seq-primary_std", index_col=0)
df

In [None]:
del df