# Load the module and the needed data for the demonstration

<pre> 
Guillermo Perez-Hernandez  guille.perez@fu-berlin.de 
</pre>
   
In this notebook we will be using the 1 millisecond trajectory of Bovine Pancreatic Trypsin Inhibitor (BPTI) generated by DE Shaw Research on the Anton Supercomputer. The original work is 
 * Shaw DE, Maragakis P, Lindorff-Larsen K, Piana S, Dror RO, Eastwood MP, Bank JA, Jumper JM, Salmon JK, Shan Y, Wriggers W: Atomic-level characterization of the structural dynamics of proteins. Science 330:341-346 (2010).  doi: 10.1126/science.1187409.

In [2]:
import projX
%matplotlib notebook

In [3]:
trajfile = 'data/c-alpha_centered.stride.100.xtc'
top = 'data/bpti-c-alpha_centered.pdb'

In [4]:
# Skip TICA if you already have an Y.npy file
try:
    
    import numpy as np
    Y = np.load('data/Y.npy')
except:
    import pyemma
    feat = pyemma.coordinates.featurizer(top)
    pairs = feat.pairs(range(feat.topology.n_atoms)[::2])
    feat.add_distances(pairs)
    src  = pyemma.coordinates.source(trajfile, features=feat)
    tica = pyemma.coordinates.tica(src, lag=100, dim=3)
    Y = tica.get_output()[0]
    np.save('data/Y.npy', Y)

# Visualize a FES and the molecular structures "behind" it
Execute the following cell and click either on the FES or on the slidebar

In [5]:
ax, fig, iwd, data_sample, geom = projX.visualize.FES(trajfile, top, 'data/Y.npy', 
                                                 nbins=50, xlabel='TICA_0', ylabel='TICA_1'
                                          )
iwd

<IPython.core.display.Javascript object>

# Do "trajectory-sampling" of a given FES
This can be more time consuming and prettly slow, but is a proof of function. The user can sample structures as they occurr in sequence in the actual trajectory

In [6]:
__, __, iwd, __ = projX.visualize.traj(trajfile, top, 
                                       'data/Y.npy', 
                                       plot_FES = True,
                                       max_frames=2000
                          )
iwd

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

  _plt.contourf(-_np.log(h).T, extent=irange)


# Intermediate steps: use projX to generate a regspace sample of the data

In [7]:
data_sample, geoms = projX.generate.sample(trajfile, top, Y, 
                                      n_points=200
                                      #keep_all_samples=True,                                      
                                      #n_geom_samples=200,
                                      #verbose=True
                                    )
data_sample.shape, geoms

22-02-17 18:15:36 pyemma.coordinates.clustering.regspace.RegularSpaceClustering[1] INFO     Presumably finished estimation. Message: Used data for centers: 84.85%




((191, 2),
 <mdtraj.Trajectory with 191 frames, 58 atoms, 58 residues, and unitcells at 0x7fc91c2a4ac8>)

# Link the PDF plot with the sampled structures and visually explore the FES 
Click either on the plot or on the widget slidebar: they're connected! 

In [8]:
# Replot the FES
from matplotlib import pylab as plt
plt.figure(figsize=(10,10))
h, (x,y) = np.histogramdd(Y[:,:2], bins=50)
plt.contourf(x[:-1], y[:-1], -np.log(h.T), alpha=.50)
# Create the linked widget
linked_wdg = projX.visualize.sample(data_sample, 
                              geoms.superpose(geoms[0]),                             
                              plt.gca(), 
                              clear_lines=True,
                              #plot_path=True
                            )
plt.plot(data_sample[:,0], data_sample[:,1],' ok', zorder=0)
# Show it
linked_wdg

<IPython.core.display.Javascript object>



# Instead of sampling the full space, use projX to generate paths along the different projections/axis

In [9]:
paths_dict, idata = projX.generate.projection_paths(trajfile, top, 
                                         'data/Y.npy', 
                                         #Y, # You can also directly give the data here
                                         n_projs=2,
                                         proj_dim = 3, 
                                         verbose=False, 
                                        )

# Link the PDF plot with the sampled paths/structures and visually explore the coordinates (separately). 
Click either on the plot or on the widget slidebar: they're connected! You can change the type of path between min_rmsd or min_disp and you can also change the coordinate sampled (0 or 1)


In [10]:
# Choose the coordinate and the tyep of path
coord = 0
#path_type = 'min_rmsd'
path_type = 'min_disp'
igeom = paths_dict[coord][path_type]["geom"]
ipath = paths_dict[coord][path_type]["proj"]

In [11]:
plt.figure(figsize=(10,10))
h, (x,y) = np.histogramdd(Y[:,:2], bins=50)
plt.contourf(x[:-1], y[:-1], -np.log(h.T), alpha=.50)

linked_wdg = projX.visualize.sample(ipath[:,:2], 
                              igeom.superpose(igeom[0]),                             
                              plt.gca(), 
                              clear_lines=True,
                              plot_path=True
                            )
linked_wdg

<IPython.core.display.Javascript object>

  app.launch_new_instance()


# You can even provide your own ```PyEMMA``` clustering object...

In [12]:
from pyemma.coordinates import cluster_kmeans
clkmeans = cluster_kmeans(Y[:,:2], 10)

22-02-17 18:15:55 pyemma.coordinates.clustering.kmeans.KmeansClustering[5] INFO     Algorithm did not reach convergence criterion of 1e-05 in 10 iterations. Consider increasing max_iter.


In [13]:
data_sample, geoms = projX.generate.sample(trajfile, top, clkmeans, 
                                     n_geom_samples=50, 
                                     #keep_all_samples=True # read the doc for this argument
                                    )

In [14]:
plt.figure(figsize=(7,7))
plt.contourf(x[:-1], y[:-1], -np.log(h.T), alpha=.50)
plt.plot(clkmeans.clustercenters[:,0], clkmeans.clustercenters[:,1],' ok')
iwdg = projX.visualize.sample(data_sample, 
                              geoms.superpose(geoms[0]),                             
                              plt.gca(), 
                              clear_lines=False,
                              #plot_path=True
                            )
iwdg

<IPython.core.display.Javascript object>

  from ipykernel import kernelapp as app


In [15]:
# Check
# https://github.com/arose/nglview/issues/518
# https://github.com/arose/nglview/issues/517