# Col150 Analysis

## Tabulate Energy + Topology Data + Calculate Radius of Gyration

In [None]:
import os, sys, shutil, subprocess
import scipy
from scipy import ndimage
from scipy.spatial import distance

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

path = os.getcwd()

In [None]:
def gyration(filename):
    df     = pd.DataFrame(columns=['x','y','z'])
    infile = open(filename, 'r')
    rfdata = infile.readlines()
    infile.close()
    rfdata = [i.rstrip('\n').split() for i in rfdata]
    N = int(rfdata[0][0])
    for j in range(0, N):
        df.at[j, ['x','y','z']]  = rfdata[ 5*j + 2]
    df = df.astype('float')
    df['rg'] = (df.x-df.x.mean())**2 + (df.y-df.y.mean())**2 + (df.z-df.z.mean())**2
    R = np.sqrt( (df.rg).mean() )
    del rfdata, df
    return R

In [None]:
Ncirc = 150
ematrix_dict = {0:'tilt', 1:'roll', 2:'twist', 3:'shift', 4:'slide', 5:'rise'}

incons   = ['oring','pcirc']

inseqs1   = ['col'+str(i).zfill(2) for i in range(1, 10, 1)]
inseqs2   = ['A150','G150','A75G75','A15G15','A1G1','A2G2','A3G3','A4G4','A5G5']

seqvars        = ['std','c05a','c15a','c20a','c30a']
seqvars_colors = {'std':'grey','c05a':'blue','c15a':'red','c20a':'gold','c30a':'green'}
seqvars_marks  = {'std':'s','c05a':'<','c15a':'^','c20a':'d','c30a':'P'}

ff_dict       = {'idt':'1-state','org':'2-state','vic':'3-state','vor':'4-state'}

ff_st         = [str(i)+'-state' for i in range(1, 5, 1)]
ff_st_dict    = {'1-state':'ideal','2-state':'oring','3-state':'zhurkin','4-state':'zhurkin+oring'}
ff_st_colors  = {'1-state':'gold','2-state':'dodgerblue','3-state':'limegreen','4-state':'tomato'}
ff_st_marks   = {'1-state':'.','2-state':'.','3-state':'.','4-state':'.'}

ff_ind         = ['ideal','kabsch','olson']
ff_ind_colors  = {'ideal':'darkgreen','kabsch':'crimson','olson':'dodgerblue'}
ff_ind_marks   = {'ideal':'.','kabsch':'^','olson':'d'}

rs_bpt             = ['090','091','092','093','094','095','096','097','098','099','100',
                     '101','012','103','104','105','106','107','108','109','110']
rs_bpturn          = [9.0,9.1,9.2,9.3,9.4,9.5,9.6,9.7,9.8,9.9,10.0,
                      10.1,10.2,10.3,10.4,10.5,10.6,10.7,10.8,10.9,11.0]
rs_tw              = [round(float(360/i), 2) for i in rs_bpturn]
colors             = plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))
rs_tw_colors       = {i:j for i in rs_tw for j in plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))}
rs_bpturn_colors   = {i:j for i in rs_bpturn for j in plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))}
rs_bpt_colors      = {i:j for i in rs_bpt for j in plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))}

In [None]:
df = pd.DataFrame()
for filename in os.listdir('in_topology'):
    name = filename.split('.')[0]
    name = name.replace('topo_', '')

    df.at[name, 'seq']      = name.split('_')[0]
    df.at[name, 'incon']    = name.split('_')[1]
    df.at[name, 'seq_type'] = name.split('_')[2]
    df.at[name, 'tot_bp']   = Ncirc

    infile = open('in_topology/'+filename, 'r')
    indata = infile.readlines()
    indata = [i.rstrip('\n') for i in indata]
    infile.close()
    indata = indata[-4:]
    for i in range(0, len(indata)):
        if 'Wr' in indata[i]:
            df.at[name, 'Wr'] = float( indata[i].split('=')[1] )
        elif 'Tw' in indata[i]:
            df.at[name, 'Tw'] = float( indata[i].split('=')[1] )
        elif 'Lk ' in indata[i]:
            df.at[name, 'Lk'] = int( indata[i].split('=')[1] )
    del indata[:]

for filename in os.listdir('in_refframe'):
    name = filename.split('.')[0]
    radius = gyration('in_refframe/'+filename)
    df.at[name, 'Rg'] = radius

df.tot_bp = df.tot_bp.astype(int)
df = df.sort_values(by=['seq','incon'])
df.to_csv("initial-data_col150_fullset")
del df