# 2018 Col150 Data Set
## Data and Graphical Analysis

In [None]:
import os, sys, shutil, subprocess
import scipy
from scipy import ndimage
from scipy.spatial import distance

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
matplotlib.rcParams.update({'figure.max_open_warning': 0})
matplotlib.rcParams.update({'font.size': 12})


%matplotlib inline
path = os.getcwd()

In [None]:
Ncirc = 150
ematrix_dict = {0:'tilt', 1:'roll', 2:'twist', 3:'shift', 4:'slide', 5:'rise'}

incons   = ['oring','pcirc']

inseqs1   = ['col'+str(i).zfill(2) for i in range(1, 10, 1)]
inseqs2   = ['A150','G150','A75G75','A15G15','A1G1','A2G2','A3G3','A4G4','A5G5']

seqvars        = ['std','c05a','c15a','c20a','c30a']
seqvars_colors = {'std':'grey','c05a':'blue','c15a':'red','c20a':'gold','c30a':'green'}
seqvars_marks  = {'std':'s','c05a':'<','c15a':'^','c20a':'d','c30a':'P'}

ff_dict       = {'idt':'1-state','org':'2-state','vic':'3-state','vor':'4-state'}

ff_st         = [str(i)+'-state' for i in range(1, 5, 1)]
ff_st_dict    = {'1-state':'ideal','2-state':'oring','3-state':'zhurkin','4-state':'zhurkin+oring'}
ff_st_colors  = {'1-state':'gold','2-state':'dodgerblue','3-state':'limegreen','4-state':'tomato'}
ff_st_marks   = {'1-state':'.','2-state':'.','3-state':'.','4-state':'.'}

ff_ind         = ['ideal','kabsch','olson']
ff_ind_colors  = {'ideal':'darkgreen','kabsch':'crimson','olson':'dodgerblue'}
ff_ind_marks   = {'ideal':'.','kabsch':'^','olson':'d'}

rs_bpt             = ['090','091','092','093','094','095','096','097','098','099','100',
                     '101','012','103','104','105','106','107','108','109','110']
rs_bpturn          = [9.0,9.1,9.2,9.3,9.4,9.5,9.6,9.7,9.8,9.9,10.0,
                      10.1,10.2,10.3,10.4,10.5,10.6,10.7,10.8,10.9,11.0]
rs_tw              = [round(float(360/i), 2) for i in rs_bpturn]
colors             = plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))
rs_tw_colors       = {i:j for i in rs_tw for j in plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))}
rs_bpturn_colors   = {i:j for i in rs_bpturn for j in plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))}
rs_bpt_colors      = {i:j for i in rs_bpt for j in plt.cm.gnuplot(np.linspace(0, 1, len(rs_tw)))}

In [None]:
def pca_df(filename):
    df   = pd.DataFrame(columns=['x','y','z'])
    infile = open(filename, 'r')
    rfdata = infile.readlines()
    infile.close()
    rfdata = [i.rstrip('\n').split() for i in rfdata]
    N = int(rfdata[0][0])
    for j in range(0, N):
        df.at[j, ['x','y','z']]  = rfdata[ 5*j + 2]

    df = df.astype('float')
    df['vx'] = df.x - df.x.mean()
    df['vy'] = df.y - df.y.mean()
    df['vz'] = df.z - df.z.mean()

    cxx = ( (df.vx*df.vx).sum() - (df.vx.sum() * df.vx.sum()) ) / (N-1)
    cxy = ( (df.vx*df.vy).sum() - (df.vx.sum() * df.vy.sum()) ) / (N-1)
    cxz = ( (df.vx*df.vz).sum() - (df.vx.sum() * df.vz.sum()) ) / (N-1)
    cyx = ( (df.vy*df.vx).sum() - (df.vy.sum() * df.vx.sum()) ) / (N-1)
    cyy = ( (df.vy*df.vy).sum() - (df.vy.sum() * df.vy.sum()) ) / (N-1)
    cyz = ( (df.vy*df.vz).sum() - (df.vy.sum() * df.vz.sum()) ) / (N-1)
    czx = ( (df.vz*df.vx).sum() - (df.vz.sum() * df.vx.sum()) ) / (N-1)
    czy = ( (df.vz*df.vy).sum() - (df.vz.sum() * df.vy.sum()) ) / (N-1)
    czz = ( (df.vz*df.vz).sum() - (df.vz.sum() * df.vz.sum()) ) / (N-1)

    covar = np.matrix([ [cxx, cxy, cxz], [cyx, cyy, cyz], [czx, czy, czz] ])
    evals, evecs = np.linalg.eig( covar )
    idx = np.argsort(evals)[::-1][: len(evals)]
    evals = evals[idx]
    evecs = evecs[:, idx]

    adj = pd.DataFrame(np.dot( df[['vx', 'vy', 'vz']], evecs), columns=['e1','e2','e3'])
    del rfdata, df
    return adj

def fig_move(figdirectory):
    for filename in os.listdir(figdirectory):
        if not os.path.exists("figures"):
            os.mkdir("figures")
    for filename in os.listdir(figdirectory):
        if filename.endswith(".png"):
            shutil.move(figdirectory+'/'+filename, figdirectory+"/figures/"+filename)
    return

def optparameterdf(file):
    infile = open(file,'r')
    indata = infile.readlines()
    infile.close()
    indata = [i.strip('\n').split() for i in indata]
    header = indata[2:3]
    pars   = indata[3:]
    # Ensure all objects in dataframe are float64
    for k in range(0, len(pars)):
        for j, x in enumerate(pars[k]):
            try:
                pars[k][j] = float(x)
            except ValueError:
                pass
    # Generate dataframe
    df = pd.DataFrame.from_records(pars, columns=header)
    # Generate new column: 'Bend'
    for k in range(0, len(df)):
        x = float(df.loc[k, 'Tilt'])
        y = float(df.loc[k, 'Roll'])
        df.loc[k, 'Bend'] = float(np.sqrt(x**2 + y**2))
    del header, pars
    return df

# Load grooves data and analyze

In [None]:
pathgroove = path+'/opt_grooves'
lst = []
for filename in os.listdir(pathgroove):
    lst.append(filename)
lst = sorted(lst)

In [None]:
# Make: minor groove figures
fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=1, figsize=(10,10), sharex=True, sharey=True)
#ax1.set_prop_cycle('color',[plt.cm.gist_rainbow(i) for i in np.linspace(0, 1, len(inseqs1))])
#ax2.set_prop_cycle('color',[plt.cm.gist_rainbow(i) for i in np.linspace(0, 1, len(inseqs1))])
#ax3.set_prop_cycle('color',[plt.cm.gist_rainbow(i) for i in np.linspace(0, 1, len(inseqs1))])

for i in range(0, len(lst)):
    if "oring" in lst[i] and "std" in lst[i]:
        if 'ideal' in lst[i]:
            for j in range(0, len(inseqs1)):   pd.read_csv(pathgroove+'/'+lst[i]).plot(y="W-min", ax=ax1, legend=False)
        elif 'kabsch' in lst[i]:  
            for j in range(0, len(inseqs1)):   pd.read_csv(pathgroove+'/'+lst[i]).plot(y="W-min", ax=ax2, legend=False)
        elif 'olson' in lst[i]:   
            for j in range(0, len(inseqs1)):   pd.read_csv(pathgroove+'/'+lst[i]).plot(y="W-min", ax=ax3, legend=False)

plt.show()
plt.clf()

In [None]:
print(inseqs2)