# Trajectory Analysis

In [None]:
import ampal
import nglview as nv

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mdtraj as md

In [None]:
#my_protein = ampal.load_pdb('/home/eva/Documents/analysis/test-onechain-GLH.pdb')

In [None]:
#my_protein.sequences

Loading the trajectory and topology files:

In [None]:
traj = md.load('/home/eva/Documents/gbsa-sims/sims-1/tet_L24E/GLH/dcd/6q5i_GLH_output.dcd', top = '/home/eva/Documents/structures/tet_L24E/GLH/6q5i_GLH.prmtop')

In [None]:
#frame1 = md.load_frame('/home/eva/Documents/gbsa-sims/sims-1/tet_L24E/GLH/dcd/6q5i_GLH_output.dcd', 0, top = '/home/eva/Documents/structures/tet_L24E/GLH/6q5i_GLH.prmtop')

In [None]:
#frame1.save_pdb('test.pdb')

## Structure

In [None]:
def show_ball_and_stick(ampal):
    view = nv.show_text(ampal.pdb)
    view.add_ball_and_stick()
    view.remove_cartoon()
    return view

In [None]:
show_ball_and_stick(my_protein)

Creating a function that will have a list and the number of chains as an input and will split that list according to the number of chains present in the structure:

In [None]:
#def assembly_splitter(lst, chain_no)

Separating the amino acids and the ACE-NH2 caps, and creating lists for their indexes:

In [None]:
res_obj = traj.topology.residues
res_list=[]
caps_positions =[]
all_residues=[]


for res in res_obj:
        all_residues.append(str(res))
        if res.name == 'ACE' or res.name == 'NHE':
            print('excluding the ACE and NH2 caps')
            caps_positions.append(str(res))
        else:
            res_list.append(str(res))
            
res_idx=[]
for res in res_list:
    res_idx.append(int(res[3:6]))
    
caps_idx=[]
for cap in caps_positions:
    caps_idx.append(int(cap[3:6]))
    
all_residues_idx=[]
for re in all_residues:
    all_residues_idx.append(int(re[3:6]))
    
#len(res_list)
#print(caps_positions)
#print(res_list)
#print(caps_idx)
#len(caps_idx)
#print(all_residues_idx)

Separating the individual chains (based on the positions of the caps):

In [None]:
chain_number = int(len(caps_idx)/2)
print('the number of chains in the structure is', chain_number)

Separating the caps by type:

In [None]:
ace=caps_idx[0::2]
nh2=caps_idx[1::2]

In [None]:
chain_list=[]
chain=[]
chain_idx = []
all_idx=[]

for i in range(len(ace)):
    chain = all_residues[(ace[i]+1):(nh2[i])]
    chain_list.append(chain)
    chain=[]

for ch in chain_list:
    for re in ch:
        all_idx.append(int(re[3:6]))    

for i in range(len(ace)):
    chain = all_residues_idx[(ace[i]+1):(nh2[i])]
    chain_idx.append(chain)
    chain=[]

#print(all_idx)
#print(chain_idx)

We can view the sequence and residue indexes of a single chain with:

In [None]:
#chain_list[0]

In [None]:
#chain_idx[0]

## RMSD calculation

Getting the syntax for the atom expression:

In [None]:
selection = traj.topology.select_expression('backbone')
print(selection)

Calculating the RMSD from the first frame (initial structure), for the backbone atoms:

In [None]:
rmsds = md.rmsd(traj, traj, 0, atom_indices = [a.index for a in traj.topology.atoms if a.is_backbone])

Creating an array for time (4 fs timestep, 200 ns simulation time):

In [None]:
t = np.arange(0.0, 200.0, 0.004)

Creating the RMSD plot:

In [None]:
fig,ax = plt.subplots()
ax.plot(t, rmsds)
ax.set(xlabel='t (ns)', ylabel='RMSD (nm)', title='RMSD')
ax.grid()

fig.savefig('rmsd.png')
plt.show()

## RMSF calculation

Getting the syntax for the atom selection:

In [None]:
selection = traj.topology.select_expression('name CA')
print(selection)

Calculating the RMSF from the first frame (initial structure) for the CA atoms:

In [None]:
rmsfs = md.rmsf(traj, traj, 0, atom_indices = [a.index for a in traj.topology.atoms if a.name == 'CA'])

Creating the RMSF plot (per assembly): 

In [None]:
fig,ax = plt.subplots()
ax.plot(res_idx, rmsfs)
ax.set(xlabel='residue number', ylabel='RMSF (nm)', title='RMSF')
ax.grid()

fig.savefig('rmsf.png')
plt.show()

Creating the RMSF plot per chain (example: first chain)

In [None]:
chain_length = len(chain_idx[0])

In [None]:
fig,ax = plt.subplots()
ax.plot(chain_idx[0], rmsfs[0*chain_length:1*chain_length])
ax.set(xlabel='residue number', ylabel='RMSF (nm)', title='RMSF')
ax.grid()

fig.savefig('rmsf-per-chain-1.png')
plt.show()

In [None]:
#rmsfs[1*chain_length:2*chain_length] #second chain residues

In [None]:
#rmsfs[2*chain_length:3*chain_length] #third chain, and so on...

#### RMSF - all individual chains in one graph

In [None]:
fig,ax = plt.subplots()
ax.plot(chain_idx[0], rmsfs[0*chain_length:1*chain_length], label = "chain A")
ax.plot(chain_idx[0], rmsfs[1*chain_length:2*chain_length], label = "chain B")
ax.plot(chain_idx[0], rmsfs[2*chain_length:3*chain_length], label = "chain C")
ax.plot(chain_idx[0], rmsfs[3*chain_length:4*chain_length], label = "chain D")

ax.set(xlabel='residue number', ylabel='RMSF (nm)', title='RMSF')
ax.grid()
ax.legend()

fig.savefig('rmsf-per-chain-multiple.png')
plt.show()

## Secondary structure calculation

In [None]:
sec_struct = md.compute_dssp(traj, simplified = False)
print(sec_struct)

Simplified secondary structure: H = helix, C = coil, E = strand

In [None]:
sec_struct_simple = md.compute_dssp(traj, simplified=True)
print(sec_struct_simple)

In [None]:
frames = len(sec_struct_simple)
print('the number of frames is', frames)

Saving the secondary structure of each residue in the struct_list:

In [None]:
idx = 0
struct_list=[]
resid_list=[]
aa = len(all_residues) #the number of total residues in the assembly

for idx in range(0, aa):
    for frame in sec_struct_simple:
        resid_list.append(frame[idx])
    struct_list.append(resid_list)
    resid_list=[]

Getting the secondary structure of the i-th residue with struct_list[i-1] (across the whole simulation):

In [None]:
#print(struct_list[0])
#print(len(struct_list[9]))

Note that this includes the ACE and NH2 caps (sec. struct 'NA').

### Helicity per residue calculation

The res_list contains all residues with their residue number, whereas the res_idx contains just the residue number (ACE and NH2 caps excluded)

In [None]:
#print(res_list)
#print(res_idx)

Calculating the helicity per residue - excluding caps:

In [None]:
count = 0
helicity_per_residue = []
helicity = 0

for res in struct_list:
    if 'NA' in res:
        print('excluding the ACE and NH2 caps')   
    else:
        for snapshot in res:
            if snapshot == 'H':
                count+=1
            helicity = (count/frames)*100
        helicity_per_residue.append(helicity)
        count=0

In [None]:
#len(helicity_per_residue)
#print(helicity_per_residue)

Creating the %helicity per residue plot:

In [None]:
fig,ax = plt.subplots()
ax.plot(res_idx, helicity_per_residue)
ax.set(xlabel='residue number', ylabel='%helicity per residue', title='%helicity per residue')
ax.grid()

fig.savefig('hpr.png')
plt.show()

### Helicity per chain calculation

In [None]:
fig,ax = plt.subplots()
ax.plot(chain_idx[0], helicity_per_residue[0*chain_length:1*chain_length])
ax.set(xlabel='residue number', ylabel='%helicity per residue', title='%helicity per residue')
ax.grid()

fig.savefig('hpr-per-chain-1.png')
plt.show()

#### Helicity per residue plot - all individual chains in one graph

In [None]:
fig,ax = plt.subplots()
ax.plot(chain_idx[0], helicity_per_residue[0*chain_length:1*chain_length], label = "chain A")
ax.plot(chain_idx[0], helicity_per_residue[1*chain_length:2*chain_length], label = "chain B")
ax.plot(chain_idx[0], helicity_per_residue[2*chain_length:3*chain_length], label = "chain C")
ax.plot(chain_idx[0], helicity_per_residue[3*chain_length:4*chain_length], label = "chain D")

ax.set(xlabel='residue number', ylabel='%helicity per residue', title='%helicity per residue')
ax.grid()
ax.legend()

fig.savefig('hpr-per-chain-multiple.png')
plt.show()

## Hydrogen bonds calculation

Each row in the resulting array contains the donor atom, the hydrogen atom and the acceptor atom indices.

In [None]:
hbonds = md.baker_hubbard(traj, freq=0.1, exclude_water=True, periodic=False, sidechain_only=False, distance_cutoff=0.25, angle_cutoff=120)

print(hbonds)

In [None]:
label = lambda hbond : '%s -- %s' % (traj.topology.atom(hbond[0]), traj.topology.atom(hbond[2]))

In [None]:
for hbond in hbonds:
    print (label(hbond))

## 