In [None]:
import pickle
import numpy as np
import pyemma
import pyemma.plots as mplt
import matplotlib.pyplot as plt
import msmrd2.tools.trajectoryTools as trajectoryTools
import msmrd2.tools.analysis as analysisTools
import msmrd2.tools.msmTools as msmTools

In [None]:
# Load parameters from parameters file
#parentDirectory = '../data/dimer/60strength/'
#parentDirectory = '../data/pentamer/dimer/'
parentDirectory = '../data/patchyProtein/benchmark/'
parameterDictionary = analysisTools.readParameters(parentDirectory + "parameters")
# Parameters for loading files (from original simulation)
nfiles = parameterDictionary['numFiles']
dt = parameterDictionary['dt'] 
stride = parameterDictionary['stride']
totalTimeSteps = parameterDictionary['timesteps'] 
parameterDictionary

In [None]:
# Calculated parameters
dtEffective = dt*stride # needed to obtain rate dictionary
#fnamebase = parentDirectory + 'simDimer_'
fnamebase = parentDirectory + 'simPatchyProtein_'
# Parameters for MSM generation 
#numBoundStates = 8
numBoundStates = 6
lagtime = 50 #75 150
reversible = True #False
stitching = True

In [None]:
# Load discrete trajectories
dtrajs = []
fnamesuffix = '_discrete' #'_discrete_test' #'_discrete_python' # '_discrete'
filetype = 'xyz' # 'h5' or 'xyz'
for i in range(nfiles):
    dtraj = trajectoryTools.loadDiscreteTrajectory(fnamebase, i, fnamesuffix, filetype)
    dtrajs.append(dtraj)    
    print("File ", i+1, " of ", nfiles, " done.", end="\r")

In [None]:
# Slice trajectories getting rid of the unbound state 0
unboundStateIndex = 0
slicedDtrajs = trajectoryTools.splitDiscreteTrajs(dtrajs, unboundStateIndex)
# Stitch trajectories if wanted
if stitching:
    minLength = 5000
    finalTrajs = trajectoryTools.stitchTrajs(slicedDtrajs, minLength)
else:
    finalTrajs = slicedDtrajs

In [None]:
# Create MSM between transision states and bound states without stitching
mainmsm = pyemma.msm.estimate_markov_model(finalTrajs, lagtime, reversible=reversible)
# The active set keep track of the indexes used by pyemma and the ones used to describe the state in our model.
activeSet = mainmsm.active_set
print(activeSet)

In [None]:
# Pickle MSM transition matrix and active set as a dictionary   
MSM = {'transition_matrix' : mainmsm.transition_matrix, 'active_set': mainmsm.active_set}
#pickle_out = open("../data/pickled_data/MSM_dimer_t" + "{:.2E}".format(totalTimeSteps ) + 
#"_s" + "{:d}".format(stride) + "_lagt" + "{:d}".format(lagtime) + ".pickle","wb")
#pickle_out = open("../data/pickled_data/MSM_pentamer_t" + "{:.2E}".format(totalTimeSteps ) + 
#"_s" + "{:d}".format(stride) + "_lagt" + "{:d}".format(lagtime) + ".pickle","wb")
pickle_out = open("../data/pickled_data/MSM_patchyProtein_t" + "{:.2E}".format(totalTimeSteps ) + 
"_s" + "{:d}".format(stride) + "_lagt" + "{:d}".format(lagtime) + ".pickle","wb")
pickle.dump(MSM, pickle_out)
pickle_out.close()

## Implied timescales and CK test

In [None]:
# CK-test
nsets = 12
#sets = [1, 3, 5, 7, 9, 11, 13, 15]
#memberships = np.vstack((range(nsets), sets)).transpose()
cktest = mainmsm.cktest(nsets, mlags=6)
mplt.plot_cktest(cktest);
#plt.savefig('cktest12.pdf')

In [None]:
mplt.plot_cktest(cktest);
#plt.savefig('cktest12.pdf')

In [None]:
# Calculate implied time scales
maxlagtime = 100 #300
its = pyemma.msm.its(finalTrajs, maxlagtime, reversible=reversible)
# with error bars
#its = pyemma.msm.its(finalTrajs, maxlagtime, reversible=reversible, errors='bayes')

In [None]:
# Plot implied timescales in ylog
nits = 20
mplt.plot_implied_timescales(its, nits = nits, ylog=True, units='steps', linewidth=1, dt=1)
plt.ylabel(r"log(timescale/steps)", fontsize = 18)
plt.xlabel(r"lag time/steps", fontsize = 18)
#plt.ylim([10.0,100000])
#plt.ylim([10.0,8000])

In [None]:
# Plot implied timescales without log
mplt.plot_implied_timescales(its, nits = nits, ylog=False, units='steps', linewidth=2, dt=1)
plt.ylabel(r"timescale/steps", fontsize = 24)
plt.xlabel(r"lag time/steps", fontsize = 24)

In [None]:
# PLOT OF ITS FOR PAPER #
nits = 20
fig, ax = plt.subplots(figsize=(10, 7))
mplt.plot_implied_timescales(its, ax = ax, nits = nits, ylog=True, units='steps', linewidth=2, dt=1, 
                             show_mean=False, markersize=0, confidence=0.95)
plt.ylabel(r"log(timescale/steps)", fontsize = 24)
plt.xlabel(r"lag time/steps", fontsize = 24)
plt.xticks(fontsize = 28)
plt.yticks(fontsize = 28)
plt.ylim([10.0,100000])
#plt.savefig('its_dimer_strength60_final_alt.pdf')
#plt.savefig('its_pentamer_strength80_test.pdf')
#plt.savefig('its_patchyProteinTest.pdf')

In [None]:
# PLOT OF ITS NOLOG FOR PAPER (maybe)#
nits = 20
fig, ax = plt.subplots(figsize=(12, 7))
mplt.plot_implied_timescales(its, ax = ax, nits = nits, ylog=False, units='steps', linewidth=2, dt=1, 
                             show_mean=False, markersize=0, confidence=0.95)
plt.ylabel(r"timescale/steps", fontsize = 24)
plt.xlabel(r"lag time/steps", fontsize = 24)
plt.xticks(fontsize = 28)
plt.yticks(fontsize = 28)
plt.ylim([10.0,40000])
#plt.savefig('its_nolog_dimer_strength60_final_alt.pdf')
#plt.savefig('its_nolog_pentamer_strength80_test.pdf')

In [None]:
eig0 = mainmsm.eigenvectors_left()[0]
print(np.where(np.abs(eig0)>0.1))
plt.plot(eig0)
#plt.xlim([0,10])

## Old rate dictionary approach (deprecated)

In [None]:
rateDictionary = msmTools.MSMtoRateDictionary(mainmsm, numBoundStates, dtEffective, fullDictionary)
# Alternatively, it can all be done with one function: slice and stitch trajectories then build MSM and extract rates
# rateDictionary = msmrdals.extractRatesMSM(dtrajs, lagtime, numBoundStates, stitching, fullDictionary)

In [None]:
# Export to data file with pickle (to be loaded later)
pickle_out = open("../data/pickled_data/ratedictionary_dimer_t" + "{:.2E}".format(totalTimeSteps ) + "_s" + "{:d}".format(stride) + "_lagt" + "{:d}".format(lagtime) + ".pickle","wb")
pickle.dump(rateDictionary, pickle_out)
pickle_out.close()

In [None]:
rateDictionary

In [None]:
rateDictionary["b2->b1"]