# Statistics

### Index
0. [Imports](#imports)
1. [System parameters](#parameters)
2. [Number statistics](#stats)
3. [Hydrogen bond statistics](#hbonds)
4. [Path-residues](#respaths)

### Imports <a name="imports"></a>

In [None]:
import sys
sys.path.append('/home/jorge/research/peptides/simulations/peptides')
from mdtools.core import *
from mdtools.statistics import *

### System parameters <a name="parameters"></a>

In [None]:
# Fixed parameters
'''
Los tubos est√°n colocados:
1 2
3 4
'''

traj_name = "4t10s_run01"
N_tubes = 4 # Number of nantubes in the system
N_rings = 10 # Number of peptides in a nanotube
N_res = 8 # Number of resiudes in a peptide

In [None]:
# Get the reslist of the residues that are pointing towards the channel
channel_reslist = get_channel_reslist(N_rings, N_res, [0, 1, 2, 3], [[4, 4], [2, 6], [0, 0], [6, 2]])
mask = "resid "
for res in channel_reslist:
    mask += str(res + 1) + " "
print(mask)

In [None]:
traj = md.load(traj_name+"_RMSD.nc", top=traj_name+".parm7")
traj.center_coordinates()
myselections = ["resname LYS and name NZ", "resname LYN and name NZ", "resname TYD and name OH"]
p = MyParams(traj, N_tubes, N_rings, N_res, myselections)
p.IONs = traj.top.select("element Cl")
p.IONs = traj.top.select("resname TFA and name OD1 OD2")

In [None]:
# Load data

canaldata = pd.read_csv("channel0_stats.csv")
canaldata['tube'] = "channel0"

alldata = canaldata
for tubo in (range(1)):
    tubedata = pd.read_csv("lumen"+str(tubo)+"_stats.csv")
    tubedata['tube'] = "lumen" + str(tubo)
    alldata = alldata.append(tubedata, ignore_index=True)
alldata.tail()

detail = pd.read_csv("channel0_detail.csv")
detail['residues'] = detail['donor'] + '-' + detail['acceptor']
detail['residues'].replace(to_replace='(\w+)\-\w+\-(\w+\-?)\-\w+\-?', value=r'\1-\2', regex=True, inplace=True)
detail.tail()

### Number statistics <a name="stats"></a>

In [None]:
N_hist(alldata, x='N_WATs', bins='int', hue='tube', element='step', alpha=0.01,
       title='Waters inside the tubes and the channel', xlabel='Number of water molecules inside')

In [None]:
print(canaldata['N_WATs'].mean()/3, canaldata['N_WATs'].std()/3)
print(alldata[alldata['tube']==1]['N_WATs'].mean()/3, alldata[alldata['tube']==1]['N_WATs'].std()/3)

In [None]:
evolution(alldata, x='step', y='N_WATs', hue='tube',
          title='Evolution of the number of waters inside the tubes and channel', ylabel='Number of water molecules inside')

In [None]:
N_hist(canaldata, x='N_WATs', bins='int', title='Waters inside the channel', xlabel='Number of water molecules inside')

In [None]:
N_hist(canaldata, x='N_IONs', bins='int', title='Ions inside the channel', xlabel='Number of ions inside')

In [None]:
fig, ax = plt.subplots()
evolution(alldata, x='step', y='N_WATs', label='Water')
evolution(alldata, x='step', y='N_IONs', label='Ions')

fig, ax = plt.subplots()
sns.lineplot(data=canaldata, x="istep", y="N_wats", label="Water")
sns.lineplot(data=canaldata, x="istep", y="N_cls", label="Chloride")
title = 'Evolution of the number of waters and chlorides inside the channel'
xlabel = 'Step'
ylabel = 'Number of waters/chlorides'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, True)

### Hydrogen bond statistics <a name="hbonds"></a>

In [None]:
N_hist(alldata, x='N_HBonds', bins='int', hue='tube', element='step', alpha=0.01,
       title='Hydrogen bonds inside the tubes and the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
alldata['N_HBonds'].mean()

In [None]:
aux = detail[detail['donor']=='HOH-O']
aux = aux[aux['acceptor'].isin(['LYN-O', 'LYS-O', 'PHD-O'])]
N_hist(aux, x='N_HBonds', bins='int', hue='residues', element='step', alpha=0.01,
       title='Hydrogen bonds (TYPE) inside the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
N_hist(alldata, x='ave_dist', hue='tube', element='step', alpha=0.01,
       title='Average distance of the hydrogen bonds', xlabel='Average distance')

In [None]:
alldata['ave_dist'].mean()

In [None]:
aux = detail[~detail['donor'].isin(['LYN-N', 'LYS-N', 'PHD-N'])]
aux = aux[~aux['acceptor'].isin(['LYN-O', 'LYS-O', 'PHD-O', 'LYS-NZ', 'LYS-N'])]
N_hist(aux, x='N_HBonds', bins='int', hue='residues', element='step', alpha=0.01,
       title='Hydrogen bonds (TYPE) inside the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
for res in aux['residues'].unique():
    print(res, aux[aux['residues']==res]['N_HBonds'].mean())

In [None]:
aux = detail[detail['residues']=='LYS-Cl-']
N_hist(aux, x='N_HBonds', bins='int', hue='residues', element='step', alpha=0.01,
       title='Hydrogen bonds (TYPE) inside the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
aux = detail[detail['residues']=='LYN-Cl-']
N_hist(aux, x='N_HBonds', bins='int', hue='residues', element='step', alpha=0.01,
       title='Hydrogen bonds (TYPE) inside the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
aux = detail[detail["residues"]=='HOH-Cl-']
N_hist(aux, x='N_HBonds', bins='int', hue='residues', element='step', alpha=0.01,
       title='Hydrogen bonds (TYPE) inside the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
fig, ax = plt.subplots()
aux = detail[detail["acceptor"].isin(['LYN-O', 'LYS-O', 'PHD-O'])]
sns.lineplot(data=aux, x="istep", y="N_HBonds", label="-O")
aux = detail[detail["donor"].isin(['LYN-N', 'LYS-N', 'PHD-N'])]
sns.lineplot(data=aux, x="istep", y="N_HBonds", label="N-")
title = 'Hydrogen bonds with the backbone inside the channel'
xlabel = 'Step'
ylabel = 'Number of hydrogen bonds'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, True)

In [None]:
aux = detail[detail["residues"]=="HOH-HOH"]
aux['tube'] = "channel"
aux = aux.append(alldata[alldata["tube"]!="channel"], ignore_index=True)
N_hist(aux, x='N_HBonds', bins='int', hue='tube', element='step', alpha=0.01,
       title='Hydrogen bonds (only WAT-WAT) inside the lumen and the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
aux = detail[~detail['donor'].isin(['LYN-N', 'LYS-N', 'PHD-N'])]
aux = aux[~aux['acceptor'].isin(['LYN-O', 'LYS-O', 'PHD-O'])]
aux = aux[~aux["residues"].isin(["HOH-Cl-", "LYN-Cl-", "LYS-Cl-"])]

new = pd.DataFrame(columns=['step', 'N_HBonds'])
for step in range(min(aux['step']), max(aux['step'])):
    N_hbonds = 0
    for index, data in aux[aux['step']==step].iterrows():
        N_hbonds += data["N_HBonds"]
    new.loc[new.shape[0]] = [step, N_hbonds]
new
new['tube'] = "channel"
new = new.append(alldata[alldata["tube"]!="channel"], ignore_index=True)

N_hist(new, x='N_HBonds', bins='int', hue='tube', element='step', alpha=0.01,
       title='Hydrogen bonds (no Cl) inside the lumen and the channel', xlabel='Number of hydrogen bonds inside')

In [None]:
aux = detail[~detail['donor'].isin(['LYN-N', 'LYS-N', 'PHD-N'])]
aux = aux[~aux['acceptor'].isin(['LYN-O', 'LYS-O', 'PHD-O', 'LYS-NZ', 'LYS-N'])]
N_hist(aux, x='ave_dist', hue='residues', element='step', alpha=0.01,
       title='Average distance of the hydrogen bonds by type', xlabel='Average distance')

### Path-residues <a name="respaths"></a>