In [47]:
import mdtraj as md
import MDAnalysis as mda
import numpy as np
from scipy.stats import ttest_ind_from_stats

from tqdm import tqdm
from pathlib import Path
import os
from natsort import natsorted
import pandas as pd
from addict import Dict as Adict

import pyemma as pm
import deeptime as dt
import deeptime.markov.msm as msm
import deeptime.markov.hmm as hmm

from deeptime.plots import plot_implied_timescales, plot_energy2d, plot_contour2d_from_xyz
from deeptime.markov.sample import *
from deeptime.markov import TransitionCountEstimator
from deeptime.util import energy2d

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns

from funcs_build_msm import _tica, _kmeans, get_data
from funcs_validate_msm import *
from funcs_sample import *
from funcs_plotting import *
from funcs_characterise import *
from paths import *

In [49]:
study_name = 'n_clusters'
hp_id = 0
savedir = Path(f'data_egfr/msm/validation/{study_name}_hp{hp_id}_validate')

n_states = 6

### Compute the RMSDs within each sampled states

In [50]:
for state_i in range(n_states):
    rmsd_mean, rmsd_std = cal_within_state_rmsd(md.load(savedir/f'MS_{state_i+1}.pdb'))
    save_example_with_property(savedir/f'MS_{state_i+1}.pdb', rmsd_mean, savedir/f'MS_{state_i+1}_rmsd_mean.pdb')

### Compute the different scores between every two pairs of states

In [51]:
samples = [md.load(savedir/f'MS_{state_i+1}.pdb') for state_i in range(n_states)]
scores = []

In [52]:
res_scores = []

for i in range(n_states):
    res_scores.append([])
    rmsd_mean_i, rmsd_std_i = cal_within_state_rmsd(samples[i])
    for j in range(n_states):
        if j > i:
            rmsd_mean_j, rmsd_std_j = cal_within_state_rmsd(samples[j])
            rmsd_mean_ij, rmsd_std_ij = cal_between_states_rmsd(samples[i], samples[j])
            res_scores[i].append(cal_between_states_diff(rmsd_mean_ij, rmsd_std_i, rmsd_std_j))

In [53]:
for i in range(n_states):
    for j in range(n_states-i-1):
            save_example_with_property(savedir/f'MS_{i+1}.pdb', res_scores[i][j], savedir/f'MS_{i+1}_{j+i+2}_score.pdb')

In [54]:
res_scores[4][0]

array([0.04765154, 0.14540334, 0.29164919, 0.35498241, 0.36290987,
       0.26320927, 0.22450082, 0.16554016, 0.28733089, 0.28417404,
       0.30377695, 0.25727489, 0.1820081 , 0.17873304, 0.12497637,
       0.09740564, 0.09242351, 0.05942848, 0.04837592, 0.03339426,
       0.0303492 , 0.05950953, 0.1227501 , 0.22761362, 0.28810782,
       0.28631577, 0.31933474, 0.32558648, 0.28968244, 0.21856643,
       0.17651488, 0.11610397, 0.05468467, 0.06141113, 0.03937182,
       0.0982999 , 0.12796131, 0.24987585, 0.27143079, 0.3678026 ,
       0.59038795, 0.52876054, 0.56202206, 0.35389423, 0.14562079,
       0.05489738, 0.02942193, 0.00181923, 0.        , 0.01086357,
       0.0235109 , 0.01781174, 0.02064098, 0.07862881, 0.11277583,
       0.09924489, 0.11317164, 0.24125443, 0.18057042, 0.26605608,
       0.28688535, 0.17760164, 0.17369878, 0.23045313, 0.27620906,
       0.2086982 , 0.32418448, 0.33544044, 0.43925307, 0.44639956,
       0.62426591, 0.61046725, 0.57687086, 0.47442076, 0.49783

### Inspect samples of dfg states

In [3]:
dfg_in_mean, dfg_in_std = cal_within_state_rmsd(dfg_in_samples)
dfg_out_mean, dfg_out_std = cal_within_state_rmsd(dfg_out_samples)
in_out_mean, in_out_std = cal_between_states_rmsd(dfg_in_samples, dfg_out_samples)
score = cal_between_states_diff(in_out_mean, dfg_in_std, dfg_out_std)

In [4]:
save_example_with_property("./data_egfr/msm/dunbrack_studies/lag100_random_trials/61/MS_0.pdb", score, 'rmsd_diff_dfg-in-out.pdb')



In [None]:
save_example_with_property("./data_egfr/msm/dunbrack_studies/lag100_random_trials/61/MS_0.pdb", score, 'rmsd_diff_dfg-in-out.pdb')

In [None]:
save_example_with_property("./data_egfr/msm/dunbrack_studies/lag100_random_trials/61/MS_0.pdb", score, 'rmsd_diff_dfg-in-out.pdb')