In [None]:
from ebc.Clustering import EBC

import matplotlib.pyplot as plt
import numpy as np
import mdtraj as md
import seaborn as sns

from rdkit import Chem
from rdkit.Chem import AllChem

In [None]:
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rc('font', size=48)

In [None]:
N = 10000
SYSTEM_NAME = 'ALAD'
FOLDER = 'data/'

In [None]:
trajectory = md.load(f'{FOLDER}{SYSTEM_NAME}_trajectory.dcd', top=f'{FOLDER}{SYSTEM_NAME}_init.pdb')
trajectory = trajectory.superpose(trajectory[0])
phi, psi = md.compute_phi(trajectory)[1], md.compute_psi(trajectory)[1]
torsions = np.concatenate((phi, psi), axis=-1) + np.pi
energies = np.genfromtxt(f'{FOLDER}{SYSTEM_NAME}_trajectory.csv', delimiter='\t', usecols=[2])
torsions_rdkit = np.load(f'{FOLDER}{SYSTEM_NAME}_torsions_rdkit.npy') + np.pi
energies_rdkit = np.load(f'{FOLDER}{SYSTEM_NAME}_energies_rdkit.npy')

In [None]:
SKIP = 100
plt.figure(0, figsize=(10, 10), dpi=400)

plt.scatter(torsions[:, 0][::SKIP]-np.pi, torsions[:, 1][::SKIP]-np.pi, s=2, vmax=100, color='black', alpha=1, marker='.')
sns.kdeplot(x=torsions[:, 0][::SKIP]-np.pi, y=torsions[:, 1][::SKIP] - np.pi,\
            levels=100, gridsize=100, fill=True, cmap=plt.cm.plasma, alpha=0.95, linewidth=0,\
            bw_adjust=0.5, thresh=1e-2)
sns.kdeplot(x=torsions[:, 0][::SKIP]-np.pi, y=torsions[:, 1][::SKIP] - np.pi, levels=10, alpha=0.8, color='w')
# , cbar=False, cbar_kws={"ticks":[0.05, 0.1, 0.2, 0.3, 0.4], "label": "Sampling Density [\%]"}
#, cbar=True, cbar_kws={"ticks":[0.05, 0.1, 0.2, 0.3, 0.4]}
ax = plt.gca()
ax.set_ylim(-np.pi, np.pi)
ax.set_xlim(-np.pi, np.pi)
plt.xticks([-np.pi / 2, np.pi / 2], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
plt.yticks([-np.pi / 2, np.pi / 2], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
ax.set_xlabel('$\phi$ [rad]', labelpad=10)
ax.set_ylabel('$\psi$ [rad]', labelpad=10)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False) 

#plt.savefig('sampling_md.png', bbox_inches='tight')
#plt.savefig('sampling_md.pdf', bbox_inches='tight')

In [None]:
SKIP = 100
plt.figure(20, figsize=(10, 10), dpi=400)

plt.scatter(torsions_rdkit[:, 0][::SKIP]-np.pi, torsions_rdkit[:, 1][::SKIP]-np.pi, s=2, vmax=100, color='black', alpha=1, marker='.')
sns.kdeplot(x=torsions_rdkit[:, 0][::SKIP]-np.pi, y=torsions_rdkit[:, 1][::SKIP] - np.pi,\
            levels=100, gridsize=100, fill=True, cmap=plt.cm.plasma, alpha=0.95, linewidth=0,\
            bw_adjust=0.5, thresh=1e-2)
sns.kdeplot(x=torsions_rdkit[:, 0][::SKIP]-np.pi, y=torsions_rdkit[:, 1][::SKIP] - np.pi, levels=10, alpha=0.8, color='w')
# , cbar=False, cbar_kws={"ticks":[0.05, 0.1, 0.2, 0.3, 0.4], "label": "Sampling Density [\%]"}
ax = plt.gca()
ax.set_ylim(-np.pi, np.pi)
ax.set_xlim(-np.pi, np.pi)
plt.xticks([-np.pi / 2, np.pi / 2], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
plt.yticks([-np.pi / 2, np.pi / 2], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
ax.set_xlabel('$\phi$ [rad]', labelpad=10)
ax.set_ylabel('$\psi$ [rad]', labelpad=10)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False) 

#plt.savefig('sampling_rdkit.png', bbox_inches='tight')
#plt.savefig('sampling_rdkit.pdf', bbox_inches='tight')

In [None]:
ebc = EBC(temperature=100, n_clusters=5, proto_radius=0.1, knn=20, boxsize=2 * np.pi, use_sparse=True) #proto_radius=6, n_clusters=9, #
ebc.fit(torsions_rdkit, energies_rdkit)
ebc._prepare_coordinates_2D()  
ebc.show()

In [None]:
s=10
s_big=250
alpha_proto=1
fig = plt.figure(0, figsize=(12, 10), dpi=400, facecolor='white')
#plt.scatter(ebc._states_2D[:, 0], ebc._states_2D[:, 1], c=-ebc.energies, s=s, cmap=plt.cm.inferno, zorder=-1, alpha=1.0) 
#for idk, cluster_key in enumerate(ebc._cluster_ids):
#    coords = ebc._proto_2D[ebc.get_cluster_members(cluster_key)]
#    plt.scatter(coords[:, 0], coords[:, 1], color=ebc.cluster_colormap[idk], s=s_big, alpha=alpha_proto)
ax = plt.gca()
ax.grid(False)
plt.scatter(ebc._proto_2D[:, 0], ebc._proto_2D[:, 1], c=ebc.pi * 100, s=100, cmap=plt.cm.plasma, zorder=100, alpha=1.0)
cbar = plt.colorbar()
cbar.set_label('Population [\%]', labelpad=30) # [\%]
cbar.outline.set_visible(False)
cbar.set_ticks([0.0, 0.3, 0.6])
plt.box(False)
ax.set_ylim(-np.pi+np.pi, np.pi+np.pi)
ax.set_xlim(-np.pi+np.pi, np.pi+np.pi)
plt.xticks([-np.pi / 2 +np.pi, np.pi / 2 +np.pi], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
plt.yticks([])
ax.set_xlabel('$\phi$ [rad]', labelpad=10)
#ax.set_ylabel('$\psi$ [rad]', labelpad=10)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False) 

#plt.savefig('populations_rdkit.pdf', bbox_inches='tight')

In [None]:
s=10
s_big=75
alpha_proto=1
fig = plt.figure(0, figsize=(10, 10), dpi=400, facecolor='white')
#plt.scatter(ebc._states_2D[:, 0], ebc._states_2D[:, 1], c=-ebc.energies, s=s, cmap=plt.cm.inferno, zorder=-1, alpha=1.0) 
for idk, cluster_key in enumerate(ebc._cluster_ids):
    coords = ebc._proto_2D[ebc.get_cluster_members(cluster_key)]
    plt.scatter(coords[:, 0], coords[:, 1], color=ebc.cluster_colormap[idk], s=s_big, alpha=alpha_proto)
ax = plt.gca()
ax.grid(False)
#plt.scatter(ebc._proto_2D[:, 0], ebc._proto_2D[:, 1], c=ebc.pi * 100, s=75, cmap=plt.cm.plasma, zorder=100, alpha=0.8)
#cbar = plt.colorbar()
cbar.set_label('Population', labelpad=30) # [\%]
cbar.outline.set_visible(False)
cbar.set_ticks([])
plt.box(False)
ax.set_ylim(-np.pi+np.pi, np.pi+np.pi)
ax.set_xlim(-np.pi+np.pi, np.pi+np.pi)
plt.xticks([-np.pi / 2 +np.pi, np.pi / 2 +np.pi], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
plt.yticks([])
ax.set_xlabel('$\phi$ [rad]', labelpad=10)
#ax.set_ylabel('$\psi$ [rad]', labelpad=10)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False) 

plt.savefig('clusters_rdkit.pdf', bbox_inches='tight')

In [None]:
ebc = EBC(temperature=100, n_clusters=5, knn=20, proto_radius=0.1, boxsize=2 * np.pi, use_sparse=True) #proto_radius=6, n_clusters=9, #
ebc.fit(torsions, energies)

In [None]:
ebc._prepare_coordinates_2D()  
cluster_colormap = {
    0: (1.0, 0.4980392156862745, 0.054901960784313725),    
    1: (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),    
    4: (0.17254901960784313, 0.6274509803921569, 0.17254901960784313),
    3: (0.5803921568627451, 0.403921568627451, 0.7411764705882353),
    2: (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
}
ebc.cluster_colormap = cluster_colormap

In [None]:
s=10
s_big=250
alpha_proto=1
fig = plt.figure(0, figsize=(12, 10), dpi=400, facecolor='white')
#plt.scatter(ebc._states_2D[:, 0], ebc._states_2D[:, 1], c=-ebc.energies, s=s, cmap=plt.cm.inferno, zorder=-1, alpha=1.0) 
#for idk, cluster_key in enumerate(ebc._cluster_ids):
#    coords = ebc._proto_2D[ebc.get_cluster_members(cluster_key)]
#    plt.scatter(coords[:, 0], coords[:, 1], color=ebc.cluster_colormap[idk], s=s_big, alpha=alpha_proto)
ax = plt.gca()
ax.grid(False)
plt.scatter(ebc._proto_2D[:, 0], ebc._proto_2D[:, 1], c=ebc.pi * 100, s=100, cmap=plt.cm.plasma, zorder=100, alpha=1.0)
cbar = plt.colorbar()
cbar.set_label('Population [\%]', labelpad=30) # [\%]
cbar.outline.set_visible(False)
cbar.set_ticks([0.0, 0.1, 0.2])
plt.box(False)
ax.set_ylim(-np.pi+np.pi, np.pi+np.pi)
ax.set_xlim(-np.pi+np.pi, np.pi+np.pi)
plt.xticks([-np.pi / 2 +np.pi, np.pi / 2 +np.pi], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
plt.yticks([],)
ax.set_xlabel('$\phi$ [rad]', labelpad=10)
#ax.set_ylabel('$\psi$ [rad]', labelpad=10)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False) 
plt.savefig('populations_md.pdf', bbox_inches='tight')

In [None]:
s=10
s_big=75
alpha_proto=1
fig = plt.figure(0, figsize=(10, 10), dpi=400, facecolor='white')
#plt.scatter(ebc._states_2D[:, 0], ebc._states_2D[:, 1], c=-ebc.energies, s=s, cmap=plt.cm.inferno, zorder=-1, alpha=1.0) 
for idk, cluster_key in enumerate(ebc._cluster_ids):
    coords = ebc._proto_2D[ebc.get_cluster_members(cluster_key)]
    plt.scatter(coords[:, 0], coords[:, 1], color=ebc.cluster_colormap[idk], s=s_big, alpha=alpha_proto)
ax = plt.gca()
ax.grid(False)
#plt.scatter(ebc._proto_2D[:, 0], ebc._proto_2D[:, 1], c=ebc.pi * 100, s=100, cmap=plt.cm.plasma, zorder=100, alpha=1.0)
#cbar = plt.colorbar()
#cbar.set_label('Population [\%]', labelpad=20) # [\%]
#cbar.outline.set_visible(False)
#cbar.set_ticks([])
plt.box(False)
ax.set_ylim(-np.pi+np.pi, np.pi+np.pi)
ax.set_xlim(-np.pi+np.pi, np.pi+np.pi)
plt.xticks([-np.pi / 2 +np.pi, np.pi / 2 +np.pi], [r"-$\frac{\pi}{2}$", r"$\frac{\pi}{2}$"],)
plt.yticks([],)
ax.set_xlabel('$\phi$ [rad]', labelpad=10)
#ax.set_ylabel('$\psi$ [rad]', labelpad=10)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False) 
plt.savefig('clusters_md.pdf', bbox_inches='tight')