In [1]:
import pandas as pd
import numpy as np

# Read MSM data

In [2]:
msm_data=np.load('../1_MSM/msm_data.npz', allow_pickle=True)

In [4]:
msm_data.files

['raw_Q_G',
 'dtrajs',
 'probability_micro',
 'center',
 'eigenvalues_list',
 'meta_dtrajs',
 'coarse_state_centers',
 'meta_dist',
 'meta_set',
 'meta_samples']

In [5]:
meta_dtrajs = msm_data['meta_dtrajs']

In [7]:
meta_dtrajs.shape

(56, 6665)

In [8]:
meta_dtrajs_subsample = meta_dtrajs[:,::50]

In [10]:
meta_dtrajs_subsample.shape

(56, 134)

# Reading SASA data and save for each trajectories

In [25]:
n_trajs = 56

Significant cutsites:
0. R69: 64-75
1. M75: 70-81
2. A78: 73-84
3. I159: 154-165
4. L202: 197-208
5. K204: 199-210
6. E206: 201-212
7. F207: 202-213
8. A254: 249-260
9. R255: 250-261
10. L258: 253-264
11. Q260: 255-266
12. A261: 256-267

In [23]:
cutsite_range = {
    "R69": [64,75],
    "M75": [70,81],
    "A78": [73,84],
    "I159": [154,165],
    "L202": [197,208],
    "K204": [199,210],
    "E206": [201,212],
    "F207": [202,213],
    "A254": [249,260],
    "R255": [250,261],
    "L258": [253,264],
    "Q260": [255,266],
    "A261": [256,267],
}

In [41]:
for key, value in cutsite_range.items():    
    # initialize array for ASA of each cutsite
    ASA_cutsite = np.zeros((n_trajs, 134))
    first_number = value[0]
    second_number = value[1]
    print(f"{key}: First number = {first_number}, Second number = {second_number}")
    for traj in range(56):
        data = np.loadtxt(f'../cg_refold_sims/traj_asa/asa_traj{traj}.dat')
        ASA_cutsite[traj,:] = np.sum(data[::50,first_number:second_number], axis=1)

    np.save(f'sim/ASA_cutsite/{key}_ASA.npy', ASA_cutsite)
    

R69: First number = 64, Second number = 75
M75: First number = 70, Second number = 81
A78: First number = 73, Second number = 84
I159: First number = 154, Second number = 165
L202: First number = 197, Second number = 208
K204: First number = 199, Second number = 210
E206: First number = 201, Second number = 212
F207: First number = 202, Second number = 213
A254: First number = 249, Second number = 260
R255: First number = 250, Second number = 261
L258: First number = 253, Second number = 264
Q260: First number = 255, Second number = 266
A261: First number = 256, Second number = 267


# Group ASA for each cutsite by state

In [47]:
meta_dtrajs_subsample

array([[5, 5, 5, ..., 5, 5, 5],
       [2, 2, 2, ..., 2, 2, 2],
       [2, 2, 2, ..., 2, 2, 2],
       ...,
       [5, 5, 5, ..., 5, 5, 5],
       [5, 5, 5, ..., 5, 5, 5],
       [5, 5, 5, ..., 5, 5, 5]])

In [49]:
meta_dtrajs_subsample_conc = np.array(meta_dtrajs_subsample.flatten(), dtype=int)

In [50]:
meta_dtrajs_subsample_conc

array([5, 5, 5, ..., 5, 5, 5])

In [52]:
for key, value in cutsite_range.items(): 
    print(f"Working on cutsite: {key}")
    cutsite_asa = np.load(f'sim/ASA_cutsite/{key}_ASA.npy')
    cutsite_asa_conc = np.array(cutsite_asa.flatten(), dtype=float)
    for st in range(6):
        cutsite_state = cutsite_asa_conc[meta_dtrajs_subsample_conc==st]
        print(f'sample size of state {st}: {len(cutsite_state)}')
        np.save(f'sim/ASA_cutsite_then_state/ASA_{key}_state_{st}.npy', cutsite_state)
    

Working on cutsite: R69
sample size of state 0: 308
sample size of state 1: 200
sample size of state 2: 1319
sample size of state 3: 1303
sample size of state 4: 68
sample size of state 5: 4306
Working on cutsite: M75
sample size of state 0: 308
sample size of state 1: 200
sample size of state 2: 1319
sample size of state 3: 1303
sample size of state 4: 68
sample size of state 5: 4306
Working on cutsite: A78
sample size of state 0: 308
sample size of state 1: 200
sample size of state 2: 1319
sample size of state 3: 1303
sample size of state 4: 68
sample size of state 5: 4306
Working on cutsite: I159
sample size of state 0: 308
sample size of state 1: 200
sample size of state 2: 1319
sample size of state 3: 1303
sample size of state 4: 68
sample size of state 5: 4306
Working on cutsite: L202
sample size of state 0: 308
sample size of state 1: 200
sample size of state 2: 1319
sample size of state 3: 1303
sample size of state 4: 68
sample size of state 5: 4306
Working on cutsite: K204
sam