## Run TICA

In [None]:
import os
import pickle
import numpy as np
import mdtraj as md
from timewarp.utils.evaluation_utils import compute_internal_coordinates
from timewarp.datasets import RawMolDynDataset
from timewarp.dataloader import (
    DenseMolDynBatch,
    moldyn_dense_collate_fn,
)
from itertools import islice
import matplotlib.pyplot as plt
#from astropy.stats import circcorrcoef
import matplotlib as mpl
from matplotlib.colors import LogNorm
plt.rc('font', size=35) 


In [None]:
proteins = ["SAGA", "DPAS", "LAKS"]
proteins = ["VPDT", "DSDE", "MTNS"]
proteins = ["VPDT", "GRSG", "MTNS"]
proteins = ["SAEL", "CTSA", "LPEM"]
number = 3
proteins = ["AWKC", "LYVI", "CSFQ"] 
number = 4
proteins = ["LYVI", "CTSA"]
number = 7
proteins = ["SAEL", "LPEM"]
number = 9

In [None]:
from matplotlib.colors import LogNorm
plt.rc('font', size=35) 

def plot_tic01_2(ax, tics, tics_lims, cmap='viridis'):
    _ = ax.hist2d(tics[:,0], tics[:,1], bins=100, norm=LogNorm(), cmap=cmap, rasterized=True)

    ax.set_ylim(tics_lims[:,1].min(),tics_lims[:,1].max())
    ax.set_xlim(tics_lims[:,0].min(),tics_lims[:,0].max())

    
def plot_free_energy2(ax, torison, label=None, linestyle='-'):
    hist, edges = np.histogram(torison, bins=100, density=True)
    free_energy = -np.log(hist/hist.max())
    centers = 0.5*(edges[1:] + edges[:-1])
    ax.plot(centers, free_energy, linewidth=4, label=label, linestyle=linestyle)
    #ax.set_xlabel(xlabel)
    #plt.title("Free energy")
    

In [None]:
save=True

In [None]:
from matplotlib.ticker import FormatStrFormatter
n_proteins = len(proteins)
fig, axes = plt.subplots(n_proteins, 3, figsize=(18, 6*n_proteins), sharey='row')
axes[0, 0].set_title("MD")
axes[0, 1].set_title("Exploration")
axes[0, 2].set_title("MCMC")
for i, protein in enumerate(proteins):
    tics=npz["tics"]
    tics_model_exp=npz["tics_model_exp"]
    tics_model=npz["tics_model"]
    energies_model=npz["energies_model"]
    energies_model_exp=npz["energies_model_exp"]
    traj_energy=npz["traj_energy"]
    plot_tic01_2(axes[i, 0], tics, tics_lims=tics*1.2)
    plot_tic01_2(axes[i, 1], tics_model_exp, tics_lims=tics*1.2)
    plot_tic01_2(axes[i, 2], tics_model, tics_lims=tics*1.2)
    axes[n_proteins-1, 0].set_xlabel("TIC 0")
    axes[n_proteins-1, 1].set_xlabel("TIC 0")
    axes[n_proteins-1, 2].set_xlabel("TIC 0")
    axes[i, 0].set_ylabel("TIC 1")
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    axes[i, 2].set_xticks([])
    axes[i, 0].set_yticks([])

    #axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    plt.subplots_adjust(wspace=0, hspace=.05)
    pad = 5 # in points
for ax, row in zip(axes[:,0], proteins):
    ax.annotate(row, xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                xycoords=ax.yaxis.label, textcoords='offset points',
                size='large', ha='right', va='center', rotation=90)
if save:
    plt.savefig(base_dir+f"outputs/figures/4AA-TICA-{number}.svg", bbox_inches = "tight")

In [None]:
MCMC = True
fig, axes = plt.subplots(n_proteins, 2, figsize=(12, 6*n_proteins), sharey='row')
for i, protein in enumerate(proteins):
    tics=npz["tics"]
    tics_model_exp=npz["tics_model_exp"]
    tics_model=npz["tics_model"]
    energies_model=npz["energies_model"]
    energies_model_exp=npz["energies_model_exp"]
    traj_energy=npz["traj_energy"]
    plot_free_energy2(axes[i, 0], tics[:, 0],"MD")
    plot_free_energy2(axes[i, 0], tics_model_exp[:, 0],"Exploration", linestyle="-.")
    if MCMC:
        plot_free_energy2(axes[i, 0], tics_model[:, 0],"MCMC", linestyle="--")
    plot_free_energy2(axes[i, 1], tics[:, 1],"MD")
    plot_free_energy2(axes[i, 1], tics_model_exp[:, 1],"Exploration", linestyle="-.")
    if MCMC:
        plot_free_energy2(axes[i, 1], tics_model[:, 1],"MCMC", linestyle="--")
    #axes[0].set_xlim(-np.pi,np.pi)
    axes[n_proteins-1, 0].set_xlabel("TIC 0")
    axes[i, 0].set_ylabel("Free energy/$k_B T$")
    #axes[0].set_ylabel("F/k_B T")
    axes[n_proteins-1, 1].set_xlabel("TIC 1")
    #axes[1].xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    #axes[1].xaxis.set_major_locator(plt.MaxNLocator(2))
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    
axes[0, 0].set_title("Free energy projections",  x=1.)
plt.subplots_adjust(wspace=0, hspace=.05)
axes[0, 1].legend(fontsize=25)
if save:
    plt.savefig(base_dir+f"outputs/figures/4AA-free-energy-{number}.svg", bbox_inches = "tight")

In [None]:
fig, axes = plt.subplots(n_proteins, 1, figsize=(12, 6*n_proteins), sharex=True)
from matplotlib.ticker import FormatStrFormatter

for i, protein in enumerate(proteins):
    tics=npz["tics"]
    tics_model_exp=npz["tics_model_exp"]
    tics_model=npz["tics_model"]
    energies_model=npz["energies_model"].flatten()
    energies_model_exp=npz["energies_model_exp"]
    traj_energy=npz["traj_energy"]
    min_val = np.min(traj_energy)
    max_val = np.max(energies_model_exp)
    lims = (min_val, min(max_val, -100))
    axes[i].hist(
        traj_energy,
        bins=100,
        density=True,
        label="MD",
        histtype='step',
        linewidth=5,
        #range=lims,
        #alpha=0.5,
    )

    axes[i].hist(
        energies_model_exp,
        bins=100,
        density=True,
        label=f"Exploration",
        histtype='step',
        range=lims,
        linestyle='-.',
        linewidth=4,
        #alpha=0.5,
    )
    axes[i].hist(
        energies_model,
        bins=100,
        density=True,
        label=f"MCMC",
        histtype='step',
        linestyle='--',
        #range=lims,
        linewidth=4,
        #alpha=0.5,
    )
    #axes[i].yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

    axes[i].set_yticks([])
axes[n_proteins-1].set_xlabel("Energy in kJ/mol")

axes[0].set_title("Energy distribution")
#plt.xlabel("Energy in kJ/mol")
axes[0].legend();
plt.subplots_adjust(wspace=0, hspace=.05)
if save:
    plt.savefig(base_dir+f"outputs/figures/4AA-energy-{number}.svg", bbox_inches = "tight")