# Caso

# Estadísticas (statistics)

## Index
0. [Imports](#Imports)
1. [System parameters](#System-parameters)
2. [Statistics](#Statistics)
3. [Hydrogen bond stability](#Hydrogen-bond-stability)
4. [Paths](#Paths)
5. [Coordination numbers](#Coordination-numbers)
6. [Hydrogen bond coordination and structures](#Hydrogen-bond-coordination-and-structures)

## Imports

In [None]:
import sys
sys.path.insert(1, '/home/jorge/analisis/main/')
from main import *
from estadisticas import *

## System parameters

In [None]:
# Parámetros fijos del sistema
'''
Los tubos están colocados:
1 2
3 4
'''

traj_name = "4tubes_run01"
N_tubes = 4 # Número de tubos en el sistema
N_rings = 6 # Número de anillos en un tubo
N_res = 8 # Número de residuos en un anillo
# Los índices de los residuos (LYS y LYN) que están en el canal entre los 4 nanotubos son (el primer índice es 1):
# 3 15 19 31 35 47 53 61 69 77 85 93 101 109 117 125 133 141 147 159 163 175 179 191
channel_res_1 = "3, 15, 19, 31, 35, 47"
channel_res_2 = "53, 61, 69, 77, 85, 93"
channel_res_3 = "101, 109, 117, 125, 133, 141"
channel_res_4 = "147, 159, 163, 175, 179, 191"

In [None]:
traj = pt.iterload(traj_name+"_RMSD.nc", traj_name+".top")
p = Params(traj, N_tubes, N_rings, N_res, channel_res_1, channel_res_2, channel_res_3, channel_res_4)

## Statistics

In [None]:
canaldata = pd.read_csv("canal_stats.csv")
canaldata['tube'] = "channel"

alldata = canaldata
for tubo in (range(1, 2)):
    tubedata = pd.read_csv("tube"+str(tubo)+"_stats.csv")
    tubedata['tube'] = tubo
    alldata = alldata.append(tubedata, ignore_index=True)
alldata.tail()

detail = pd.read_csv("canal_hbonds_detail.csv")

In [None]:
N_wats = alldata["N_wats"]
bins = np.arange(min(N_wats)-0.5, max(N_wats)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=alldata, x="N_wats", hue="tube", bins=bins, element="step", lw=2, alpha=0.01)
title = 'Waters inside the tubes and the channel'
xlabel = 'Number of waters inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
fig, ax = plt.subplots()
sns.lineplot(data=alldata, x="istep", y="N_wats", hue="tube")
title = 'Evolution of the number of waters inside the tubes and channel'
xlabel = 'Step'
ylabel = 'Number of waters'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
N_wats = canaldata['N_wats']
bins = np.arange(min(N_wats)-0.5, max(N_wats)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=canaldata, x="N_wats", bins=bins, lw=2)
title = 'Waters inside the channel'
xlabel = 'Number of waters inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
N_cls = canaldata['N_cls']
bins = np.arange(min(N_cls)-0.5, max(N_cls)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=canaldata, x="N_cls", bins=bins, lw=2)
title = 'Chlorides inside the channel'
xlabel = 'Number of chlorides inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
fig, ax = plt.subplots()
sns.lineplot(data=canaldata, x="istep", y="N_wats", label="Water")
sns.lineplot(data=canaldata, x="istep", y="N_cls", label="Chloride")
title = 'Evolution of the number of waters and chlorides inside the channel'
xlabel = 'Step'
ylabel = 'Number of waters/chlorides'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, True)

In [None]:
N_hbonds = alldata["N_hbonds"]
bins = np.arange(min(N_hbonds)-0.5, max(N_hbonds)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=alldata, x="N_hbonds", hue="tube", bins=bins, element="step", lw=2, alpha=0.01)
title = 'Hydrogen bonds inside the tubes and the channel'
xlabel = 'Number of hydrogen bonds inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=alldata, x="ave_dist", hue="tube", element="step", lw=2, alpha=0.01)
title = 'Average distance of the hydrogen bonds'
xlabel = 'Average distance'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
aux = detail[detail["residues"]!="N-WAT"]
aux = aux[aux["residues"]!="WAT-O"]
N_hbonds = aux["N_hbonds"]
bins = np.arange(min(N_hbonds)-0.5, max(N_hbonds)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=aux, x="N_hbonds", hue="residues", bins=bins, element="step", lw=2, alpha=0.01)
title = 'Type of hydrogen bonds inside the channel'
xlabel = 'Number of hydrogen bonds inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
aux = detail[detail["residues"]=="LYS-WAT"]
N_hbonds = aux["N_hbonds"]
bins = np.arange(min(N_hbonds)-0.5, max(N_hbonds)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=aux, x="N_hbonds", hue="residues", bins=bins, element="step", lw=2, alpha=0.01)
title = 'Hydrogen bonds (TYPE) inside the channel'
xlabel = 'Number of hydrogen bonds inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
fig, ax = plt.subplots()
aux = detail[detail["residues"]=="WAT-O"]
sns.lineplot(data=aux, x="istep", y="N_hbonds", label="WAT-O")
aux = detail[detail["residues"]=="N-WAT"]
sns.lineplot(data=aux, x="istep", y="N_hbonds", label="N-WAT")
title = 'Hydrogen bonds with the backbone inside the channel'
xlabel = 'Step'
ylabel = 'Number of hydrogen bonds'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, True)

In [None]:
aux = detail[detail["residues"]=="WAT-WAT"]
aux['tube'] = "channel"
# sns.displot(data=aux, x="N_hbonds", kind="kde", bw_adjust=2, lw=2)
aux = aux.append(alldata[alldata["tube"]!="channel"], ignore_index=True)
N_hbonds = aux["N_hbonds"]
bins = np.arange(min(N_hbonds)-0.5, max(N_hbonds)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=aux, x="N_hbonds", hue="tube", bins=bins, element="step", lw=2, alpha=0.01)
title = 'Hydrogen bonds (only WAT-WAT) inside the tubes and the channel'
xlabel = 'Number of hydrogen bonds inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
aux = detail[detail["residues"]!="N-WAT"]
aux = aux[aux["residues"]!="WAT-O"]
aux = aux[aux["residues"]!="WAT-CL"]
aux = aux[aux["residues"]!="LYN-CL"]
aux = aux[aux["residues"]!="LYS-CL"]

new = pd.DataFrame(columns=['istep', 'N_hbonds'])
for step in range(min(aux['istep']), max(aux['istep'])):
    N_hbonds = 0
    for index, data in aux[aux['istep']==step].iterrows():
        N_hbonds += data["N_hbonds"]
    new.loc[new.shape[0]] = [step, N_hbonds]
new
new['tube'] = "channel"
new = new.append(alldata[alldata["tube"]!="channel"], ignore_index=True)

N_hbonds = new["N_hbonds"]
bins = np.arange(min(N_hbonds)-0.5, max(N_hbonds)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=new, x="N_hbonds", hue="tube", bins=bins, element="step", lw=2, alpha=0.01)
title = 'Hydrogen bonds (no Cl) inside the tubes and the channel'
xlabel = 'Number of hydrogen bonds inside'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
aux = detail[detail["residues"]!="N-WAT"]
aux = aux[aux["residues"]!="WAT-O"]

fig, ax = plt.subplots()
sns.histplot(data=aux, x="ave_dist", hue="residues", element="step", lw=2, alpha=0.01)
title = 'Average distance of the hydrogen bonds by type'
xlabel = 'Average distance'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

## Hydrogen bond stability

In [None]:
stab = pd.read_csv("canal_hbonds_stability.csv")
stab['consecutives'] = stab['consecutives'].apply(lambda x: np.array([int(i) for i in x[1:-1].split(',')]))
clean = stab[stab["residues"]!="N-WAT"]
clean = clean[clean["residues"]!="WAT-O"]
print("Average number of consecutive frames:")

#fig, axs = plt.subplots(len(clean["residues"].unique()))
for index, res in enumerate(clean["residues"].unique()):
    aux = clean[clean["residues"] == res]
    
    consecutives = np.array([])
    for i, row in aux.iterrows():
        consecutives = np.concatenate((consecutives, row["consecutives"]))
    
    print(res, consecutives.mean())
    bins = np.arange(min(consecutives)-0.5, max(consecutives)+1, 1)
    
    fig, ax = plt.subplots()
    sns.histplot(data=consecutives, bins=bins, lw=2, ax=ax)
    xlabel = 'Number of consecutive frames'
    ylabel = 'Count of hydrogen bonds'
    decorate_ax(ax, res, 16, xlabel, ylabel, 14, 12, 2, 4, False)
#     plt.savefig('stab_'+res+'.png')

In [None]:
stab = pd.read_csv("tube1_hbonds_stability.csv")
stab['consecutives'] = stab['consecutives'].apply(lambda x: np.array([int(i) for i in x[1:-1].split(',')]))
clean = stab[stab["residues"]!="N-WAT"]
clean = clean[clean["residues"]!="WAT-O"]
print("Average number of consecutive frames:")

for res in clean["residues"].unique():
    aux = clean[clean["residues"] == res]
    
    consecutives = np.array([])
    for i, row in aux.iterrows():
        consecutives = np.concatenate((consecutives, row["consecutives"]))
    
    print(res, consecutives.mean())
    bins = np.arange(min(consecutives)-0.5, max(consecutives)+1, 1)
    
    fig, ax = plt.subplots()
    sns.histplot(data=consecutives, bins=bins, lw=2)
    xlabel = 'Number of consecutive frames'
    ylabel = 'Count of hydrogen bonds'
    decorate_ax(ax, res, 16, xlabel, ylabel, 14, 12, 2, 4, False)
#     plt.savefig('stab_tube1.png')

In [None]:
stab = pd.read_csv("canal_hbonds_stability.csv")
stab['steps'] = stab['steps'].apply(lambda x: np.array([int(i) for i in x[1:-1].split(',')]))
clean = stab[stab["residues"]!="N-WAT"]
clean = clean[clean["residues"]!="WAT-O"]
print("Average number of frames:")

#fig, axs = plt.subplots(len(clean["residues"].unique()))
for index, res in enumerate(clean["residues"].unique()):
    aux = clean[clean["residues"] == res]
    
    frames = np.array([])
    for i, row in aux.iterrows():
        frames = np.append(frames, len(row["steps"]))
    
    print(res, frames.mean())
    bins = np.arange(min(frames)-0.5, max(frames)+1, 1)
    
    fig, ax = plt.subplots()
    sns.histplot(data=frames, bins=bins, lw=2, ax=ax)
    xlabel = 'Number of frames'
    ylabel = 'Count of hydrogen bonds'
    decorate_ax(ax, res, 16, xlabel, ylabel, 14, 12, 2, 4, False)
#     plt.savefig('stab_'+res+'.png')

In [None]:
stab = pd.read_csv("tube1_hbonds_stability.csv")
stab['steps'] = stab['steps'].apply(lambda x: np.array([int(i) for i in x[1:-1].split(',')]))
clean = stab[stab["residues"]!="N-WAT"]
clean = clean[clean["residues"]!="WAT-O"]
print("Average number of frames:")

#fig, axs = plt.subplots(len(clean["residues"].unique()))
for index, res in enumerate(clean["residues"].unique()):
    aux = clean[clean["residues"] == res]
    
    frames = np.array([])
    for i, row in aux.iterrows():
        frames = np.append(frames, len(row["steps"]))
    
    print(res, frames.mean())
    bins = np.arange(min(frames)-0.5, max(frames)+1, 1)
    
    fig, ax = plt.subplots()
    sns.histplot(data=frames, bins=bins, lw=2, ax=ax)
    xlabel = 'Number of frames'
    ylabel = 'Count of hydrogen bonds'
    decorate_ax(ax, res, 16, xlabel, ylabel, 14, 12, 2, 4, False)
#     plt.savefig('stab_'+res+'.png')

## Paths

In [None]:
paths = pd.read_csv("canal_paths.csv")
paths['path'] = paths['path'].apply(lambda x: np.array([int(i) for i in x[1:-1].split(',')]))
paths['residues'] = paths['residues'].apply(lambda x: [i.replace("'", "").strip(" ") for i in x[1:-1].split(',')])
paths

In [None]:
aux = paths
aux['len'] = paths['path'].apply(lambda x: len(x))
lens = aux['len']
bins = np.arange(min(lens)-0.5, max(lens)+1, 1)

fig, ax = plt.subplots()
sns.histplot(data=aux, x="len", bins=bins, lw=2)
title = 'Number of atoms in the path'
xlabel = 'Number of atoms'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=paths, x="path_length", element="step", lw=2, alpha=0.01)
title = 'Length of the path'
xlabel = 'Length ($\AA$)'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=paths, x="dz", element="step", lw=2, alpha=0.01)
title = 'Signed (direction) distance (in the Z axis) from start to finish'
xlabel = 'Distance ($\AA$)'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
aux = paths
aux['dz'] = abs(paths['dz'])

fig, ax = plt.subplots()
sns.histplot(data=aux, x="dz", element="step", lw=2, alpha=0.01)
title = 'Distance (in the Z axis) from start to finish'
xlabel = 'Distance ($\AA$)'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, False)

In [None]:
aux.sort_values('dz')

In [None]:
for i in aux[aux['istep'] == 0]['residues']:
    print(i)
for i in aux[aux['istep'] == 0]['path']:
    print(i+1)

In [None]:
residues = []
for i, row in paths.iterrows():
    residues += row['residues']

fig, ax = plt.subplots()
pd.Series(residues).value_counts(sort=True).plot(kind='bar', edgecolor='k', linewidth=2)
decorate_ax(ax, "Number of bonds in path", 16, "Type of bond", "Count", 14, 12, 2, 4, False)

In [None]:
residues = []
for i, row in paths.iterrows():
    atoms = row['path']
    for atom in atoms:
        residues += [traj.top.atom(atom).resname]

fig, ax = plt.subplots()
pd.Series(residues).value_counts(sort=True).plot(kind='bar', edgecolor='k', linewidth=2)
decorate_ax(ax, "Number of atoms in path", 16, "Type of atom", "Count", 14, 12, 2, 4, False)

In [None]:
aux = paths
aux['dz'] = abs(paths['dz'])
aux['len'] = paths['path'].apply(lambda x: len(x))

fig, ax = plt.subplots()
sns.heatmap(aux.corr(), annot=True, cmap='coolwarm')
title = 'Correlation'
xlabel = ''
ylabel = ''
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 10, 2, 0, False)

## Coordination numbers

In [None]:
cn = pd.read_csv("canal_coordination.csv")
cn

In [None]:
fig, ax = plot_coordination_atom("canal", 3984)
title = 'Coordination numbers of an atom'
xlabel = 'Coordination number'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, True)

In [None]:
fig, axs = plot_coordinations("canal")
xlabel = 'Coordination number'
ylabel = 'Count (number of frames)'
fig.supxlabel(xlabel, fontsize=14)
fig.supylabel(ylabel, fontsize=14)
for ax in axs.flatten():
    decorate_ax(ax, ax.get_title(), 14, '', '', 14, 12, 2, 4, True)

In [None]:
fig, axs = plot_coordination_averages("canal")
xlabel = 'Atom type'
ylabel = 'Coordination number'
fig.supxlabel(xlabel, fontsize=14)
fig.supylabel(ylabel, fontsize=14)
for ax in axs.flatten():
    decorate_ax(ax, ax.get_title(), 14, '', '', 14, 12, 2, 4, False)

## Hydrogen bond coordination and structures

In [None]:
cnb = pd.read_csv("canal_coordination_hbonds.csv")
cnb

In [None]:
fig, ax = plot_coordinationbonds_atom("canal", 2257)
title = 'Hydrogen bonds of an atom'
xlabel = 'Number of hydrogen bonds'
ylabel = 'Count (number of frames)'
decorate_ax(ax, title, 16, xlabel, ylabel, 14, 12, 2, 4, True)

In [None]:
fig, axs = plot_coordinationsbonds("canal")
xlabel = 'Number of hydrogen bonds'
ylabel = 'Count (number of frames)'
fig.supxlabel(xlabel, fontsize=14)
fig.supylabel(ylabel, fontsize=14)
for ax in axs.flatten():
    decorate_ax(ax, ax.get_title(), 14, '', '', 14, 12, 2, 4, True)

In [None]:
fig, axs = plot_coordinationbonds_averages("canal")
xlabel = 'Bond type'
ylabel = 'Number of hydrogen bonds'
fig.supxlabel(xlabel, fontsize=14)
fig.supylabel(ylabel, fontsize=14)
for ax in axs.flatten():
    decorate_ax(ax, ax.get_title(), 14, '', '', 14, 12, 2, 4, False)

### Cómo leer las configuraciones
Las cifras de las configuraciones indican que la molécula está formando tantos puentes de hidrógeno...<br />
`donando a CL | donando a LYN | donando a WAT | aceptando de LYS | aceptando de LYN | aceptando de WAT`

Hay que tener en cuenta que si los números de la izquierda (por ejemplo "donando a CL") son 0, estos no aparecen explícitamente en la configuración. Así que para cualquier configuración con menos de 6 cifras hay que imaginar 0s a la izquierda hasta que tenga 6 cifras.

In [None]:
print_configurations("canal", "WAT")

In [None]:
print_configurations("canal", "CL")

In [None]:
print_configurations("canal", "LYS")

In [None]:
print_configurations("canal", "LYN")