# Input file requirements:
`./CT-data.dat`

In [None]:
import numpy as np
import MDAnalysis as mda
import matplotlib.pyplot as pp
import pandas as pd
import matplotlib as mpl
from matplotlib import gridspec
from MDAnalysis.analysis.rms import RMSD
from scipy import stats
from IPython.display import Markdown


%matplotlib widget
print(mpl.__version__)

In [None]:
def get_density(vals, *args, **kwargs):
    kernel = stats.gaussian_kde(vals, *args, **kwargs)

    x = np.linspace(min(vals), max(vals), 1000)
    y = kernel(x)

    return x, y

## Define trajectories here

In [None]:
traj_files = {
    "CT-20ns" : "trajectories/20ns-trajectory/CT-data.dat",
    "CT-40ns" : "trajectories/MD-1ZNF/CT-data.dat",
}
SimTimes = {
    "CT-20ns" : 20,
    "CT-40ns" : 40,
}


dist_files = {
    "CT-20ns" : {},
    "CT-40ns" : {}
}

titles = {
    "CT-20ns" : r"1 $\times$ 20ns trajectory",
    "CT-40ns" : r"40 $\times$ 1ns trajectories"
}

for at in 'N1 N2 S1 S2'.split():
    dist_files["CT-20ns"][at] = f"trajectories/20ns-trajectory/{at}-Zn-dist.dat"
    dist_files["CT-40ns"][at] = f"trajectories/MD-1ZNF/{at}-Zn-dist.dat"
    

## Accumulate all data into a dictionary

In [None]:
data = {
    "CT-20ns" : {},
    "CT-40ns" : {}
}

for key in data:
    data[key]['df_ct'] = df_ct = pd.read_csv(
        traj_files[key], sep='\s+',
    )
    dt = 1/500
    SimTime = dt * df_ct.shape[0]

    df_ct['Time'] = np.arange(0, SimTime - 0.5*dt, dt)


    inds = []
    for n, (at, dist_file) in enumerate(dist_files[key].items()):
        inds.extend([(at, 'CTPOL'), (at, 'opt-CTPOL')])
        _df = pd.read_csv(
            dist_file, sep='\s+',  
            skiprows=1,
            names=[f'CTPOL_{at}', f'opt-CTPOL_{at}'],

        ) * 10 #convert to angstroms
        SimTime = dt * _df.shape[0]

        _df.index = np.arange(0, SimTime - 0.5*dt, dt)
        if n == 0:
            df_dist = _df
        else:
            df_dist = pd.merge(df_dist, _df, left_index=True, right_index=True)

    

    df_dist.columns = pd.MultiIndex.from_tuples(inds, names=['Atom', 'FF'])
    df_dist.index.name = 'Time'

    data[key]['df_dist'] = df_dist


## Plot both FF on same plots: CT, mean S dist, mean N dist, for each simulation time (key)

In [None]:
fig = pp.figure()
fig.set_size_inches(8,4)
gs0 = gridspec.GridSpec(1, 3)
fig.subplots_adjust(top=0.9)

for n, (key, dat) in enumerate(data.items()):

    df_ct = dat['df_ct']
    
    gs = gs0[n: 2*n + 1].subgridspec(1,3, hspace=0.01, wspace=0.01)
    #gs.update(hspace=0.01, wspace=0.01)



    ax_left = fig.add_subplot(gs[0, 0:2])
    if n == 0:
        ax0 = ax_left
    else:
        ax_left.sharey(ax0)
        ax_left.yaxis.set_tick_params(labelleft=False)

    ticks = ax_left.get_xticks

    ax_right = fig.add_subplot(gs[0, 2], sharey=ax0)


    ax_left.set_xlabel(r"Time (ns)", fontsize=16)

    ax_right.yaxis.set_visible(False)
    ax_right.xaxis.set_visible(False)


    df_ct.plot(
        x='Time',
        ax=ax_left,
        legend=False
    )

    for pol in df_ct:
        if pol == 'Time': continue
        x, y = get_density(df_ct[pol], bw_method=0.1)

        ax_right.plot(y, x, label=pol)


    ax_right.set_xlim(left=0)
    ax_left.set_xlim(0, SimTimes[key]) 

    if n > 0:
        for i in range(1, SimTimes[key]):
            ax_left.axvline(i, linestyle=(0, (5,1)), color='k', alpha=0.5, linewidth=0.5)
            
    ax_left.set_title(titles[key])
    
ax_right.legend()
ax0.set_ylabel("Charge transfer", fontsize=16)

fig.tight_layout()
pp.show()

In [None]:
fig.savefig('ct_vs_time__ctpol_vs_opt.png')

In [None]:
fig = pp.figure()
fig.subplots_adjust(top=0.9, bottom=0.15)
fig.set_size_inches(8,4)
gs0 = gridspec.GridSpec(1, 3)
gs0.update(hspace=0.01, wspace=0.1)

for n, (key, dat) in enumerate(data.items()):
    ax = fig.add_subplot(gs0[n: 2*n + 1])
    df_ct = dat['df_ct']
    df_dist = dat['df_dist']
    
    ax2 = ax.twinx()
          
    if n == 0:
        ax0 = ax
        ax20 = ax2
    else:
        ax.sharey(ax0)
        ax2.sharey(ax20)
        
    
    ax.set_zorder(ax2.get_zorder()+1)
    ax.patch.set_visible(False)
    ax.set_xlim(0, SimTimes[key])
    ax.set_ylabel('Distance ($\AA$)', fontsize=16)
    ax.set_xlabel("Time (ns)", fontsize=16)

    df_dist_ctpol = df_dist.xs("CTPOL", level="FF", axis=1)
    df_dist_opt = df_dist.xs("opt-CTPOL", level="FF", axis=1)

    nums = '0123456789'

    mean_df = df_dist_ctpol.groupby(
        by=lambda x: x.strip(nums) + " Distance",
        axis=1
    ).mean()

    df_ct.plot(
        x="Time", y="CTPOL", 
        ax=ax2, 
        alpha=0.5, 
        label="Charge Transfer",
        color='cyan',
        zorder=1,
        legend=False)

    mean_df.plot(ax=ax, color='green orange'.split(), legend=False)

    if n > 0:
        ax2.yaxis.set_tick_params(labelright=True)
        ax2.set_ylabel('Charge Transfer')
    else:
        ax2.yaxis.set_tick_params(labelright=False)
        

    if n > 0:
        for i in range(1, SimTimes[key]):
            ax.axvline(i, linestyle=(0, (5,1)), color='k', alpha=0.5, linewidth=0.5)
        
    ax.set_title(titles[key])
    
h1, l1 = ax.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()

leg1 = ax.legend(h1+h2, l1+l2)
for n, lh1 in enumerate(h1): 
    lh1.set_alpha(1)
    

In [None]:
fig.savefig('ct_dists_vs_time.png')

In [None]:
df_outlier = mean_df['N Distance'][mean_df['N Distance'] > 3]

times = df_outlier.index

exceeds_at = []
cur_int = 0
for t in times:
    if int(t) > cur_int:
        cur_int = int(t)
        exceeds_at.append(t % 1)
        
exceeds_at