In [None]:
import pandas as pd
import numpy as np
import pyemma as pe
import pickle
import matplotlib
import matplotlib.pyplot as plt

In [None]:
RMSD_matrix = pd.read_csv('E:\OneDrive - University of Edinburgh\MeysBscProject\Datasets\Mpro\RMSD_fmatrix.csv')
RMSD_matrix_opt = pd.read_csv('E:\OneDrive - University of Edinburgh\MeysBscProject\Datasets\Mpro\RMSD_fmatrix_opt.csv')
RMSD_matrix.set_index('sub_crystal_name', inplace = True)
RMSD_matrix_opt.set_index('sub_crystal_name', inplace = True)

In [None]:
Diffusion_matrix = RMSD_matrix_opt.applymap(lambda x: np.exp(-x**2 / (2*0.25)))

In [None]:
Transition_matrix = Diffusion_matrix.div(Diffusion_matrix.sum(axis=1), axis=0)

In [None]:
eigenvalues_list = []
epsilon_list = []

for epsilon in np.arange(0.1, 0.5, 0.05):
    Diffusion_matrix = RMSD_matrix_opt.applymap(lambda x: np.exp(-x**2 / (2*epsilon)))
    Transition_matrix = Diffusion_matrix.div(Diffusion_matrix.sum(axis=1), axis=0)
    epsilon_list.append(round(epsilon,2))
    eigenvalues = np.sort(np.linalg.eig(Transition_matrix)[0])[::-1]
    eigenvalues_list.append(eigenvalues[0:10].astype(float))
    print('epsilon = %.2f' %epsilon)
    print('Top ten eigenvalues')
    print(eigenvalues[0:10].astype(float))

In [None]:
eigenvalues = []
epsilons = []
i = 0

for group in eigenvalues_list:
    for value in group:
        epsilons.append(epsilon_list[i])
        eigenvalues.append(value)
    i+=1

Plot top 10 eigenvalues for epsilons

In [None]:
fig = plt.figure(num=1,figsize=(10,10))

ax = fig.add_subplot(111)
ax.set_xlim([0.05, 0.5])
ax.set_ylim([0,1.1])
ax.set_xlabel("Epsilon")
ax.set_ylabel("Top ten eigenvalues")
ax.scatter(epsilons, eigenvalues)

plt.show()

Clustering with PCCA

In [None]:
re_index = []
n = 1

all_clusters = pe.msm.PCCA(Transition_matrix.values,4)
for cluster_set in all_clusters.metastable_sets:
    print('Size of cluster %i: %i'%(n,len(cluster_set)))
    n += 1
    for index in cluster_set:
        re_index.append(RMSD_matrix_opt.columns[index])

Rearrange the RMSD matrix. Print its heatmap.

In [None]:
RMSD_matrix_re = RMSD_matrix_opt[re_index].T[re_index]

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
im = ax.imshow(RMSD_matrix_re.values,cmap = 'Reds')

plt.xticks([])
plt.yticks([])

cbar = ax.figure.colorbar(im, ax=ax)
cbar.ax.set_ylabel('RMSD in [Ã…]', rotation=-90, va="bottom")

ax.set_title("RMSD Matrix")
fig.tight_layout()
plt.savefig('RMSDHeatMap.jpeg')
plt.show()

In [None]:
cluster_table = pd.DataFrame({'sub_crystal_name':re_index})
cluster_table['cluster'] = ''
cluster_table.loc[0:10,'cluster'] = '1'
cluster_table.loc[11:42,'cluster'] = '2'
cluster_table.loc[43:,'cluster'] = '3'
cluster_table.to_csv(r'cluster_table.csv')