In [1]:
import mdtraj as md
import pyemma as pm
import openpathsampling as ops

from pathlib import Path
import os
import pickle
import pandas 
import numpy as np

In [2]:
seed = 49587
rng = np.random.default_rng(seed)
protein = 'CLN'

# Paths
top_path = './data/CLN/protein.pdb'
traj_paths = [str(x) for x in Path(r'./data/CLN').rglob(f'CLN025-0-protein-*.xtc')]
traj_paths.sort()
model_path = r'./data/CLN/'

# MSM parameters
f_scheme = 'ca'
tica_lag = 3
tica_d = 15
cluster_n = 469
cluster_stride = 1
max_iter = 1000
msm_lag = 31

# PCCA+ parameters
n_sets = 2
core_membership_cutoff = 0.9

In [3]:
# MSM & pcca

ftrajs = []
for traj_path in traj_paths:
    traj = md.load(traj_path, top=top_path)
    ftrajs.append(md.compute_contacts(traj, scheme=f_scheme)[0])

tica_mod = pm.coordinates.tica(ftrajs, lag=tica_lag, dim=tica_d, kinetic_map=True)
ttrajs = tica_mod.get_output()

kmeans_mod = pm.coordinates.cluster_kmeans(ttrajs, k=cluster_n, max_iter=max_iter, stride=cluster_stride, fixed_seed=seed)
dtrajs = kmeans_mod.dtrajs

msm = pm.msm.estimate_markov_model(dtrajs, lag=msm_lag)
_ = msm.pcca(n_sets)

  return caller(func, *(extras + args), **kw)


calculate covariances:   0%|          | 0/54 [00:00<?, ?it/s]

getting output of TICA:   0%|          | 0/54 [00:00<?, ?it/s]

  return caller(func, *(extras + args), **kw)


initialize kmeans++ centers:   0%|          | 0/469 [00:00<?, ?it/s]

kmeans iterations:   0%|          | 0/1000 [00:00<?, ?it/s]

getting output of KmeansClustering:   0%|          | 0/54 [00:00<?, ?it/s]

In [4]:
# Sava tica_mod and kmeans_mod objects 

with open(os.path.join(model_path, 'msm_models'), 'wb') as f:
    pickle.dump({'tica_mod':tica_mod, 'kmeans_mod':kmeans_mod, 'msm':msm}, f)