In [None]:
import os, math, sys, scipy
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

path    = os.getcwd()
datapath= path + '/datasets'
optpath = path + '/opt_files'
inpath  = path + '/incons'#"/home/rty10/local_simulations/2022Jan_n150_koo86/attempt_02_25Jan"

In [None]:
# Sequence lists
koo86_1 = ['A3N7','A4N6','A5C5','A5N5','A6N4','A8N2','A9N1'] 
koo86_2 = ['FCT','FGG','IAC','IAG','IAT'] 


### Global structural data

In [None]:
maindf = pd.read_csv(datapath+"/circ150_koo86_dataset", index_col=0)
maindf['jfactor']=np.exp(-1*maindf.eopt)
#compdf = pd.read_csv("czapla_jctc_2012.csv", index_col=0)
#compdf['eopt'] = -1*np.log(compdf.jfactor)
maindf.head(10)

In [None]:
maindf[['length','state','sequence','config','forcefield','ff_type','eo','eopt','jfactor']].to_csv("koo86_circular_energetics")

In [None]:
for MODEL in sorted(maindf.forcefield.unique()):
    pltdf = maindf.copy().loc[(maindf.forcefield==MODEL)&(maindf.ff_type=='dim')].reset_index(drop=True)
    plt.figure(figsize=(10, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='lk', 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(13, 16)
    plt.legend(title=MODEL, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

for MODEL in ['czapla2022','young2022']:
    pltdf = maindf.copy().loc[(maindf.forcefield==MODEL)&(maindf.ff_type=='tet')].reset_index(drop=True)
    plt.figure(figsize=(10, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='lk', 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(13, 16)
    plt.legend(title=MODEL, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

In [None]:
for MODEL in sorted(maindf.forcefield.unique()):
    pltdf = maindf.copy().loc[(maindf.forcefield==MODEL)&(maindf.ff_type=='dim')].reset_index(drop=True)
    plt.figure(figsize=(10, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='wr', 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(-1, 1)
    plt.legend(title=MODEL, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

for MODEL in ['czapla2022','young2022']:
    pltdf = maindf.copy().loc[(maindf.forcefield==MODEL)&(maindf.ff_type=='tet')].reset_index(drop=True)
    plt.figure(figsize=(10, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='wr', 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(-1, 1)
    plt.legend(title=MODEL, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

In [None]:
for MODEL in sorted(maindf.forcefield.unique()):
    pltdf = maindf.copy().loc[(maindf.forcefield==MODEL)&(maindf.ff_type=='dim')].reset_index(drop=True)
    plt.figure(figsize=(15, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='eopt', 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(0, 50)
    plt.legend(title=MODEL, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

for MODEL in ['czapla2022','young2022']:
    pltdf = maindf.copy().loc[(maindf.forcefield==MODEL)&(maindf.ff_type=='tet')].reset_index(drop=True)
    plt.figure(figsize=(15, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='eopt', 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(0, 100)
    plt.legend(title=MODEL, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

In [None]:
for TYPE in maindf.ff_type.unique():
            
    pltdf = maindf.copy().loc[(maindf.forcefield=="czapla2022")&(maindf.ff_type==TYPE)].reset_index(drop=True)
    plt.figure(figsize=(8, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='eopt', ci=None, 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(0, 100)
    plt.legend(title=TYPE, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

In [None]:
for TYPE in maindf.ff_type.unique():
            
    pltdf = maindf.copy().loc[(maindf.forcefield=="young2022")&(maindf.ff_type==TYPE)].reset_index(drop=True)
    plt.figure(figsize=(8, 3))
    sns.barplot(data=pltdf, 
                x='sequence', y='eopt', ci=None, 
                hue="state", palette=['red','green'], alpha=0.50, edgecolor="black")
    del pltdf
    plt.ylim(0, 100)
    plt.legend(title=TYPE, loc=2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    plt.show()
    plt.clf()

### Parametric data

In [None]:
pardf = pd.read_csv(datapath+"/koo86_par-twist_dataset", index_col=0)

fig, axes = plt.subplots(2, 3, figsize=(20, 5), sharex=True)

pltdf  = pardf[(pardf.sequence=='GA4T4C')
                 &(pardf.forcefield=='young2022')
                &(pardf.state=='st01')]

freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='dim')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='dim')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]

freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[0, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[0, 1])
compdf.T.plot(legend=False, color='red', ax=axes[0, 2])



freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='tet')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='tet')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]
freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[1, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[1, 1])
compdf.T.plot(legend=False, color='red', ax=axes[1, 2])

[axes[i, j].set_xlim(1, 150) for i in [0, 1] for j in [0,1]]

axes[0,0].set_ylabel("Dimeric Forcefield")
axes[1,0].set_ylabel("Tetrameric Forcefield")

plt.tight_layout()
plt.show()
plt.clf()

del pardf, pltdf, freedf, circdf, compdf

pardf = pd.read_csv(datapath+"/koo86_par-twist_dataset", index_col=0)

fig, axes = plt.subplots(2, 3, figsize=(20, 5), sharex=True)

pltdf  = pardf[(pardf.sequence=='GA4T4C')
                 &(pardf.forcefield=='young2022')
                &(pardf.state=='st02')]

freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='dim')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='dim')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]

freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[0, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[0, 1])
compdf.T.plot(legend=False, color='red', ax=axes[0, 2])



freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='tet')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='tet')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]
freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[1, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[1, 1])
compdf.T.plot(legend=False, color='red', ax=axes[1, 2])

[axes[i, j].set_xlim(1, 150) for i in [0, 1] for j in [0,1]]

axes[0,0].set_ylabel("Dimeric Forcefield")
axes[1,0].set_ylabel("Tetrameric Forcefield")

plt.tight_layout()
plt.show()
plt.clf()

del pardf, pltdf, freedf, circdf, compdf

In [None]:
pardf = pd.read_csv(datapath+"/koo86_par-roll_dataset", index_col=0)

fig, axes = plt.subplots(2, 3, figsize=(20, 5), sharex=True)

pltdf  = pardf[(pardf.sequence=='GA4T4C')
                 &(pardf.forcefield=='young2022')
                &(pardf.state=='st01')]

freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='dim')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='dim')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]

freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[0, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[0, 1])
compdf.T.plot(legend=False, color='red', ax=axes[0, 2])



freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='tet')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='tet')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]
freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[1, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[1, 1])
compdf.T.plot(legend=False, color='red', ax=axes[1, 2])

[axes[i, j].set_xlim(1, 150) for i in [0, 1] for j in [0,1]]

axes[0,0].set_ylabel("Dimeric Forcefield")
axes[1,0].set_ylabel("Tetrameric Forcefield")

plt.tight_layout()
plt.show()
plt.clf()

del pardf, pltdf, freedf, circdf, compdf

pardf = pd.read_csv(datapath+"/koo86_par-twist_dataset", index_col=0)

fig, axes = plt.subplots(2, 3, figsize=(20, 5), sharex=True)

pltdf  = pardf[(pardf.sequence=='GA4T4C')
                 &(pardf.forcefield=='young2022')
                &(pardf.state=='st02')]

freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='dim')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='dim')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]

freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[0, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[0, 1])
compdf.T.plot(legend=False, color='red', ax=axes[0, 2])



freedf = pltdf.copy().loc[(pltdf.config=='free')&(pltdf.ff_type=='tet')].reset_index(drop=True)
circdf = pltdf.copy().loc[(pltdf.config=='circ')&(pltdf.ff_type=='tet')].reset_index(drop=True)
compdf = freedf[[str(i) for i in range(1, 151)]]-circdf[[str(i) for i in range(1, 151)]]
freedf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='black', ax=axes[1, 0])
circdf[[str(i) for i in range(1, 150+1)]].T.plot(legend=False, color='blue', ax=axes[1, 1])
compdf.T.plot(legend=False, color='red', ax=axes[1, 2])

[axes[i, j].set_xlim(1, 150) for i in [0, 1] for j in [0,1]]

axes[0,0].set_ylabel("Dimeric Forcefield")
axes[1,0].set_ylabel("Tetrameric Forcefield")

plt.tight_layout()
plt.show()
plt.clf()

del pardf, pltdf, freedf, circdf, compdf