In [1]:
import tempfile
import os
import numpy as np
from msmbuilder.example_datasets import FsPeptide
from msmbuilder.dataset import dataset
from msmbuilder.featurizer import DihedralFeaturizer
from sklearn.decomposition import PCA
import mdtraj as md

## Original MSM, with time information

In [4]:
# get data
fs_peptide = FsPeptide()
fs_peptide.cache()
os.chdir(tempfile.mkdtemp())
xyz = dataset(fs_peptide.data_dir + "/*.xtc",
              topology=fs_peptide.data_dir + '/fs-peptide.pdb',
              stride=10)
print("{} trajectories".format(len(xyz)))
# msmbuilder does not keep track of units! You must keep track of your
# data's timestep
to_ns = 0.5
print("with length {} ns".format(set(len(x)*to_ns for x in xyz)))



28 trajectories
with length set([500.0]) ns


In [3]:
#featurization

featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy')

print(xyz[0].xyz.shape)
print(diheds[0].shape)

#tICA
from msmbuilder.decomposition import tICA
tica_model = tICA(lag_time=2, n_components=2)
# fit and transform can be done in seperate steps:
tica_model = diheds.fit_with(tica_model)
tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')
txx = np.concatenate(tica_trajs)

(1000L, 264L, 3L)
(1000L, 84L)


## PCA on Isomap Analysis

In [15]:
# Perform PCA in the Isomap dimension
data_file = 'C:\Users\minch_000\Documents\TJ_data\\fspeptide\\X_isomap_40_10_1.0.dat'
X_iso = np.load(data_file)
idx = [i for i in range(X_iso.shape[0])]
shuffle_idx = np.random.shuffle(idx)
X_iso = X_iso[idx, :]
pca = PCA(n_components=2)
X_ip = pca.fit_transform(X_iso)

In [13]:
# Sample along the 1st principal components
X = np.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\raw_XYZ.dat')
X = X[idx,:]
first_PCA = X_ip[:, 0]
idx_sort = first_PCA.argsort()[-len(first_PCA):][::-1]
sample_rate = int(X_ip.shape[0]/3000) # sample only 3000 frames equally spaced apart
traj = idx_sort[::sample_rate]
PCA_traj = np.reshape(X[traj,:], (len(traj), X.shape[1]/3, 3))
md_traj = md.Trajectory(PCA_traj, md.load(fs_peptide.data_dir + '/fs-peptide.pdb').topology)
md_traj.save_xtc('C:\Users\minch_000\Documents\GitHub\Protein_Dynamics_TJ\MSM_Builder_Test\PCA_traj.xtc')

TypeError: __init__() takes at least 3 arguments (2 given)

In [29]:
# Sample along the 1st principal components for calmodulin
X = np.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\raw_XYZ_250_.dat')
X_ip = np.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\X_pc_isomap_40_10_250_4.dat')
first_PCA = X_ip[:, 0]
idx_sort = first_PCA.argsort()[-len(first_PCA):][::-1]
sample_rate = int(X_ip.shape[0]/3000) # sample only 3000 frames equally spaced apart
traj = idx_sort[::sample_rate]
PCA_traj = md.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\trj9508.lh5')
PCA_traj.xyz = np.reshape(X[traj,:], (len(traj), X.shape[1]/3, 3))
PCA_traj.time = np.array([i for i in range(PCA_traj.xyz.shape[0])])
PCA_traj.save_pdb('C:\Users\minch_000\Documents\GitHub\Protein_Dynamics_TJ\MSM_Builder_Test\PCA_traj_250.pdb')

In [63]:
X = np.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\X_isomap_40_10_100.dat')
indices = []
for i in range(X.shape[0]):
    if np.isnan(np.sum(X[i,:])) or np.isinf(np.sum(X[i,:])) :
        print i
        indices.append(i)

In [70]:
X_raw = np.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\raw_XYZ_600.dat')

In [74]:
X_iso = np.load('C:\Users\minch_000\Documents\TJ_data\\fspeptide\\X_isomap_40_10_600.dat')

In [73]:
np.min(np.min(X_raw))

0.72294343

In [75]:
np.min(np.min(X_iso))

-1.593663072372757e+308

In [37]:
# Done
# isomap 200
# isomap 100

# no problem
# isomap 300
# isomap 450

nan