## Run TICA

In [None]:
import os
import pickle
import numpy as np
import mdtraj as md
from timewarp.utils.evaluation_utils import compute_internal_coordinates
from timewarp.datasets import RawMolDynDataset
from timewarp.dataloader import (
    DenseMolDynBatch,
    moldyn_dense_collate_fn,
)
from itertools import islice
import matplotlib.pyplot as plt
#from astropy.stats import circcorrcoef
import matplotlib as mpl
from matplotlib.colors import LogNorm
plt.rc('font', size=35) 


In [None]:
proteins = ["QW", "HT", "GP"]
number = 1
proteins = ["DH", "GT","TK", "CW"  ]
number = 4
proteins = ["YA" ,"KN", "KI"]
number = 3
proteins = ["QW", "HT"]
number = 0
exploration = False

In [None]:
from matplotlib.colors import LogNorm
plt.rc('font', size=35) 

def plot_tic01_2(ax, tics, tics_lims, cmap='viridis'):
    _ = ax.hist2d(tics[:,0], tics[:,1], bins=100, norm=LogNorm(), cmap=cmap, rasterized=True)

    ax.set_ylim(tics_lims[:,1].min(),tics_lims[:,1].max())
    ax.set_xlim(tics_lims[:,0].min(),tics_lims[:,0].max())

    
def plot_free_energy2(ax, torison, label=None, linestyle='-'):
    hist, edges = np.histogram(torison, bins=100, density=True)
    free_energy = -np.log(hist/hist.max())
    centers = 0.5*(edges[1:] + edges[:-1])
    ax.plot(centers, free_energy, linewidth=4, label=label, linestyle=linestyle)
    #ax.set_xlabel(xlabel)
    #plt.title("Free energy")
    

In [None]:
save=True

In [None]:
from matplotlib.ticker import FormatStrFormatter
n_proteins = len(proteins)
fig, axes = plt.subplots(n_proteins, 2, figsize=(12, 6*n_proteins), sharey='row')
axes[0, 0].set_title("MD")
axes[0, 1].set_title("Timewarp")
for i, protein in enumerate(proteins):
    npz = np.load(base_dir + f'outputs/new-training/samples/2AA-results-{protein}.npz')
    tics=npz["tics"]
    tics_model=npz["tics_model"]
    plot_tic01_2(axes[i, 0], tics, tics_lims=tics)
    plot_tic01_2(axes[i, 1], tics_model, tics_lims=tics)
    axes[n_proteins-1, 0].set_xlabel("TIC 0")
    axes[n_proteins-1, 1].set_xlabel("TIC 0")
    axes[i, 0].set_ylabel(f"TIC 1")
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    axes[i, 0].set_yticks([])

    #axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    plt.subplots_adjust(wspace=0, hspace=.05)
    
pad = 5 # in points
for ax, row in zip(axes[:,0], proteins):
    ax.annotate(row, xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                xycoords=ax.yaxis.label, textcoords='offset points',
                size='large', ha='right', va='center')
if save:
    plt.savefig(base_dir+f"outputs/figures/2AA-TICA-multi-{number}.svg", bbox_inches = "tight")

In [None]:
fig, axes = plt.subplots(n_proteins, 2, figsize=(12, 6*n_proteins), sharey='row')
for i, protein in enumerate(proteins):
    npz = np.load(base_dir + f'outputs/new-training/samples/2AA-results-{protein}.npz')
    tics=npz["tics"]
    tics_model=npz["tics_model"]
    npz_exploration = np.load(base_dir + f'outputs/new-training/samples/2AA-results-exploration-{protein}.npz')
    tics_model_exploration = npz_exploration["tics_model"]
    plot_free_energy2(axes[i, 0], tics[:, 0],"MD")
    if exploration:  
        plot_free_energy2(axes[i, 0], tics_model_exploration[:, 0],"Exploration", linestyle="--")
    plot_free_energy2(axes[i, 0], tics_model[:, 0],"MCMC", linestyle="--")

    plot_free_energy2(axes[i, 1], tics[:, 1],"MD")
    if exploration:  
        plot_free_energy2(axes[i, 1], tics_model_exploration[:, 1],"Exploration", linestyle="--")
    plot_free_energy2(axes[i, 1], tics_model[:, 1],"MCMC", linestyle="--")
    #axes[0].set_xlim(-np.pi,np.pi)
    axes[n_proteins-1, 0].set_xlabel("TIC 0")
    axes[i, 0].set_ylabel("Free energy/$k_B T$")
    #axes[0].set_ylabel("F/k_B T")
    axes[n_proteins-1, 1].set_xlabel("TIC 1")
    #axes[1].xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    #axes[1].xaxis.set_major_locator(plt.MaxNLocator(2))
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    
axes[0, 0].set_title("Free energy projections",  x=1.)
plt.subplots_adjust(wspace=0, hspace=0.05)

axes[0, 1].legend(fontsize=25)
if save:
    plt.savefig(base_dir+f"outputs/figures/2AA-free-energy-multi-{number}.svg", bbox_inches = "tight")

In [None]:
def ESS(autocorrelations, spacing=1, cut_off_at_zero=True):
    if cut_off_at_zero:
        steps_until_zero = np.where(autocorrelations<=0)[0][0]
    else:
        steps_until_zero = -1
    Neff = 1/(-1 + 2 * spacing * np.abs(autocorrelations[:steps_until_zero]).sum())
    return Neff
import arviz as az

num_md_steps=10000
max_time= 0
fig, axes = plt.subplots(n_proteins, 1, figsize=(12, 6*n_proteins), sharex=True)
for i, protein in enumerate(proteins):
    npz = np.load(base_dir + f'outputs/new-training/samples/2AA-results-{protein}.npz')
    tics=npz["tics"]
    tics_model=npz["tics_model"]
    model_time_per_step = npz["model_time_per_step"]
    md_time_per_step = npz["md_time_per_step"]
    autocorrelation_model = az.autocorr(tics_model[:, 0])
    autocorrelation_openMM = az.autocorr(tics[:, 0])
    
    steps_until_zero = np.where(autocorrelation_openMM<=0)[0][0]
    sampling_time_md = np.arange(steps_until_zero) * md_time_per_step * num_md_steps 
    steps_until_zero = np.where(autocorrelation_model<=0)[0][0]
    sampling_time_model = np.arange(steps_until_zero) * model_time_per_step * 10 
    max_time_peptide = max(sampling_time_md[-1], sampling_time_model[-1]) * 1.5
    if max_time_peptide > max_time:
        max_time = max_time_peptide
    ess_model_s = ESS(autocorrelation_model, spacing=1)/(model_time_per_step * 10)
    ess_md_s = ESS(autocorrelation_openMM, spacing=1)/(md_time_per_step * num_md_steps)
    
    
    axes[i].plot(np.arange(0, len(autocorrelation_openMM)) * md_time_per_step * num_md_steps, autocorrelation_openMM, label=f"MD | ESS/t = {ess_md_s:.3f}/s", linewidth=5)
    axes[i].plot(np.arange(0, len(autocorrelation_model)) * model_time_per_step * 10, autocorrelation_model, label=f"Timewarp | ESS/t = {ess_model_s:.3f}/s", linewidth=5)
    axes[i].set_ylabel("Autocorrelation")


    axes[i].legend(fontsize=25)
    axes[i].axhline(0, color='black', linestyle="--")
axes[n_proteins-1].set_xlabel("Lag time in s")

axes[n_proteins-1].set_xlim(0, max_time)
    
axes[0].set_title("TIC 0 autocorrelations")
plt.subplots_adjust(wspace=0, hspace=0.05)

if save:
    plt.savefig(base_dir+f"outputs/figures/2AA-autocorrelation-multi-{number}.svg", bbox_inches = "tight")


In [None]:
fig, axes = plt.subplots(n_proteins * 2, 1, figsize=(12, 6*n_proteins), sharex=True)
spacing = 10

max_time = 1e20
for i, protein in enumerate(proteins):
    npz = np.load(base_dir + f'outputs/new-training/samples/2AA-results-{protein}.npz')
    tics=npz["tics"]
    tics_model=npz["tics_model"]
    model_time_per_step = npz["model_time_per_step"]
    md_time_per_step = npz["md_time_per_step"]
    
    sampling_time_md = np.arange(len(tics)) * md_time_per_step * num_md_steps / 3600
    sampling_time_model = np.arange(len(tics_model)) * model_time_per_step * 10 /3600
    max_time_peptide = min(sampling_time_md[-1], sampling_time_model[-1])
    if max_time_peptide < max_time:
        max_time = max_time_peptide

    axes[2*i].scatter(sampling_time_md[::spacing], tics[::spacing, 0], s=.5, label="MD")
    axes[2*i+1].scatter(sampling_time_model[::spacing*10], tics_model[::spacing*10, 0], s=.5, c='C1', label="Timewarp")
    axes[2*i].legend(fontsize=20,  markerscale=10, loc=1)
    axes[2*i+1].legend(fontsize=20,  markerscale=10, loc=1)
    axes[2*i+1].set_yticks([])
    axes[2*i].set_yticks([])
    axes[2*i].set_ylabel("TIC 0")
    axes[2*i+1].set_ylabel("TIC 0")

    #axes[i].legend(fontsize=25)
    
axes[2*i+1].set_xlabel("Wall-clock sampling time in h")
axes[2*i+1].set_xlim(0, max_time)
    
axes[0].set_title("TIC 0 time depedence")
plt.subplots_adjust(wspace=0, hspace=0.05)

if save:
    plt.savefig(base_dir+f"outputs/figures/2AA-tic0-dependence-multi-{number}.png", bbox_inches = "tight", dpi=300)

    plt.savefig(base_dir+f"outputs/figures/2AA-tic0-dependence-multi-{number}.svg", bbox_inches = "tight")



