In [None]:
# Import libraries
import nglview, ipymd, MDAnalysis, mdtraj       # trajectory analysis libraries
import pyemma               # Markov model analysis

# import gromacs
# from Bio.PDB import PDBList

import datetime
now = datetime.datetime.now()

from sys import argv

# gromacs.config.setup() # UNCOMMENT if not yet setup on your system!
# print("GMX version:",gromacs.release(),"\n")
# help(gromacs.pdb2gmx)

# TRAJECTORY ANALYSIS

This notebook analyses an MD trajectory with several tools.

In [None]:
# Download the PDB target
import pypdb

# Define the analysis datapath
def now_dir_ts():
    now_ts = str(now.year)+"_"+str(now.month)+"_"+str(now.day)+"_"+str(now.hour)+"_"+str(now.minute)+"_"+str(now.second)
    return now_ts

ana_dir = "/Users/jacobnorth/Box/extracurriculars/research/SURE_S2020_fileshare/sure_data/"+input("Please enter a sub-directory of sure_data/:")+"/mdana_"+now_dir_ts()

print(ana_dir)      # Print the simulation datapath

In [None]:
# id = input("Please enter a PDB ID to simulate:")
# pdbl = PDBList()            # Create a PDBList object 
# pdbl.retrieve_pdb_file(id, file_format='pdb', pdir=ana_dir)       # Retrieve the PDB file in PDB format
# pypdb.get_pdb_file('1oca', filetype='PDB')      # Search and download on PDB

# MDTraj Intro analysis

In [None]:
ana_dir = '/Users/jacobnorth/Box/extracurriculars/research/SURE_S2020_fileshare/sure_data/1aki/mdsim_2020_6_24_15_44_SHELLSCRIPT/'

import mdtraj as md
# t = md.load('trajectory.xtc', top='trajectory.pdb')
t = md.load(ana_dir+'em.trr', top=ana_dir+'em.gro')
print(t)

In [None]:
import numpy as np

# Print basic info about the trajectory 
print("Trajectory dimensions:\t", t.xyz.shape)      # Traj dimensions
print("Trajectory mean dims:\t", np.mean(t.xyz))
print("l_unitcell for last frame:\t", t.unitcell_lengths[-1])


In [None]:
# Save the trajectory back to the disk with a hd5 format
# t[::2].save('halftraj.h5')

In [None]:
# If you only wish to keep the c-alpha atoms (CA), ...
# atoms_to_keep = [a.index for a in t.topology.atoms if a.name == 'CA']
# t.restrict_atoms(atoms_to_keep)  # this acts inplace on the trajectory
# t.save('CA-only.h5')

# Atom selection

In [None]:
print('How many atoms?    %s' % t.n_atoms)
print('How many res+solv? %s' % t.n_residues)

In [None]:
topology = t.topology
print(topology)

In [None]:
print('Fifth atom: %s' % topology.atom(4))
print('All atoms: %s' % [atom for atom in topology.atoms])

In [None]:
print('Second residue: %s' % t.topology.residue(1))
print('All residues: %s' % [residue for residue in t.topology.residues])

In [None]:
# Identify an atom of interest
atom = topology.atom(10)
print('''Hi! I am the %sth atom, and my name is %s. 
I am a %s atom with %s bonds. 
I am part of an %s residue.''' % ( atom.index, atom.name, atom.element.name, atom.n_bonds, atom.residue.name))

In [None]:
# Print ALL Carbon atoms in the structure
print([atom.index for atom in topology.atoms if atom.element.symbol is 'C' and atom.is_sidechain])

In [None]:
# Print only the even residues in the first chain...?
print([residue for residue in topology.chain(0).residues if residue.index % 2 == 0])

In [None]:
# Select atoms in the first two residues
print(topology.select('resid 1 to 2'))

In [None]:
# Print all nitrogen atoms in the structure
print(topology.select('name N and backbone'))

In [None]:
# Select CA atoms in the first two residues, show the code that does this!
selection = topology.select_expression('name CA and resid 1 to 2')
print(selection)

# Baker-Hubbard Hydrogen Bond Identification

In [None]:
from __future__ import print_function
%matplotlib inline
import matplotlib.pyplot as plt
import itertools
import mdtraj as md
import mdtraj.testing

In [None]:
t = md.load_pdb('http://www.rcsb.org/pdb/files/1oca.pdb')
print(t)

In [None]:
hbonds = md.baker_hubbard(t, periodic=False)
label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2]))
for hbond in hbonds:
    print(label(hbond))

In [None]:
da_distances = md.compute_distances(t, hbonds[:, [0,2]], periodic=False)

In [None]:
color = itertools.cycle(['r', 'b', 'gold','g'])
# for i in hbonds:
for i in [2,3,4,5,6,7,8,9]:
    plt.hist(da_distances[:, i], color=next(color), label=label(hbonds[i]), alpha=0.5)
plt.legend()
plt.ylabel('Freq');
plt.xlabel('Donor-acceptor distance [nm]')