TITLE: example.ipynb

PURPOSE: <GOES HERE>

PROJECT: <GOES HERE>

AUTHOR: Jacob Lloyd North

INSTITUTION: Oregon State University

PRECONDITIONS: 

INPUTS: 

OUTPUTS: 

SECTION 1: Include commonly used libraries

In [None]:
# Maths and visualization libraries
import numpy as np          # NumPy
import scipy        # Import all of SciPy
import networkx 
# import pathpy2
import matplotlib.pyplot as plt     # Matplotlib
import umap
import RamachanDraw

In [None]:
%matplotlib inline

In [None]:
# Machine learning libraries
import sklearn
import torch             # Import all of PyTorch
import fastai            # Import all of FastAi
# import pydbm            # boltzmann machines

In [None]:
# MD analysis libraries
import mdtraj           # Import all of MDTraj
import MDAnalysis
# import pydtmc           # discrete-time markov chains

# Normal modes of motion
# import pydmd            # Dynamic mode decomposition
#  import pynamical        # Dynamical systems 
import pyemma
# import pypcazip
import prody as pd            # Protein dynamics


In [None]:
# Other libraries
import Bio          # Biopython
import pdbtools    # Useful for dealing with pdbs
import wget         # to download pdb files
import biopandas

In [None]:
# PHYSICAL CHEMISTRY

# Quantum chemistry libraries
import quantum_dynamics
import qutip

# Statistical thermodynamics
# import curp         # energy (heat) flow analysis -- ONLY in Python2 currently!

In [None]:
# Structural biology
# import biskit       # BUILD FAILS
# import aleph        # Molecular replacement library

In [None]:
# Cellular biology
import pysces       # 

In [None]:
# DEBUG
# import mdbenchmark  # For optimizing core usage in low-resources machines

SECTION 2: MAIN

In [None]:
# Get PDB
PDB_id = input("Please enter a PDB ID: ")
# Design URL string from standard PDB naming convention
# url = 'https://files.rcsb.org/header/' + PDB_id + '.pdb'
# wget.download(url)

# Print Ramachandran plot of the protein
from RamachanDraw import fetch, phi_psi, plot

# Draw the Ramachandran plot
plot(fetch(PDB_id))

# Generating a dictionary to store the phi and psi angles, also return the ignored aminoacid residues
phi_psi_dict, ignored_res = phi_psi(fetch(PDB_id), return_ignored=True)

In [None]:
# ProDy testing
prot = pd.parsePDB('1p38')

In [None]:
# Print useful statistics
print("Radius of gyration:", pd.calcGyradius(prot))

print("Number of atoms:", prot.numAtoms())
print("Number of Coordinate sets:", prot.numCoordsets())
print("Number of residues:", prot.numResidues())
pd.showProtein(prot)

PRINCIPAL COMPONENT ANALYSIS

In [None]:
# Prepare the ensemble
ubi = pd.parsePDB('2k39', subset='calpha')
ubi_selection = ubi.select('resnum < 71')
ubi_ensemble = pd.Ensemble(ubi_selection)
ubi_ensemble.iterpose()

In [None]:
# Run PCA 
pca = pd.PCA('Ubiquitin')
pca.buildCovariance(ubi_ensemble)
pca.calcModes()

In [None]:
# Observer top 4 ranked principal components
for mode in pca[:6]:
    print(pd.calcFractVariance(mode).round(2))
# Save the principal modes
pd.saveModel(pca)

ANISOTROPIC NETWORK MODEL


In [None]:
anm = pd.ANM('ubi') # instantiate ANM object
anm.buildHessian(ubi_selection) # build Hessian matrix for selected atoms
anm.calcModes() # calculate normal modes
# saveModel(anm)

In [None]:
# Access individual mode instances
slowest_mode = anm[0]
print( slowest_mode )
print( slowest_mode.getEigval().round(3) )

In [None]:
# Confirm mode orthogonality - dot product of mode vectors
print((anm[0] * anm[1]).round(10))
print((anm[0] * anm[2]).round(10))

COMPARING EXPERIMENTAL AND THEORETICAL RESULTS

In [None]:
# Compare overlap table of PCA and ANM
pd.printOverlapTable(pca[:6], anm[:6])
pd.showOverlapTable(pca[:6], anm[:6])

DATA OUTPUT

In [None]:
pd.writeNMD('ubi_pca.nmd', pca[:3], ubi_selection)         # NMD format for nm wizard
# writeArray('ubi_pca_modes.txt', pca.getArray(), format='%8.3f')     # text


In [None]:
# pd.pathVMD('/Users/jacobnorth/Applications/VMD\ 1.9.4.app/Contents/MacOS/startup.command')
# pd.viewNMDinVMD('ubi_pca.nmd')