## Run TICA

In [None]:
import os
import pickle
import numpy as np
import mdtraj as md
from timewarp.utils.evaluation_utils import compute_internal_coordinates
from timewarp.datasets import RawMolDynDataset
from timewarp.dataloader import (
    DenseMolDynBatch,
    moldyn_dense_collate_fn,
)
from itertools import islice
import matplotlib.pyplot as plt
#from astropy.stats import circcorrcoef
import matplotlib as mpl
from matplotlib.colors import LogNorm
import mdtraj as md
from timewarp.utils.tica_utils import tica_features, run_tica, plot_tic01, plot_free_energy

plt.rc('font', size=35) 


In [None]:
save=False

In [None]:
# base_dir = ""

data_type = "test"



In [None]:
initial_idxs = [100, 16400]+ [100, 148745]+[220, 16970]
proteins =["TK"]*2+ ["AAEW"]*2+ ["EASS"]*2
number = 1

In [None]:
initial_idxs = [134053, 404213]+ [36245, 162715]
proteins =["LYVI"]*2+ ["CTSA"]*2
number = 7

In [None]:
initial_idxs = [100, 16400]+ [100, 148745]
proteins =["TK"]*2+ ["AAEW"]*2
number = 6

In [None]:
from matplotlib.colors import LogNorm
plt.rc('font', size=35) 

def plot_tic01_2(ax, tics, tics_lims, cmap='viridis'):
    _ = ax.hist2d(tics[:,0], tics[:,1], bins=100, norm=LogNorm(), cmap=cmap, rasterized=True)

    ax.set_ylim(tics_lims[:,1].min(),tics_lims[:,1].max())
    ax.set_xlim(tics_lims[:,0].min(),tics_lims[:,0].max())

    
def plot_free_energy2(ax, torison, label=None, linestyle='-'):
    hist, edges = np.histogram(torison, bins=100, density=True)
    free_energy = -np.log(hist/hist.max())
    centers = 0.5*(edges[1:] + edges[:-1])
    ax.plot(centers, free_energy, linewidth=4, label=label, linestyle=linestyle)
    #ax.set_xlabel(xlabel)
    #plt.title("Free energy")
    

In [None]:
from matplotlib.ticker import FormatStrFormatter
n_proteins = len(proteins)
fig, axes = plt.subplots(n_proteins, 3, figsize=(18, 6*n_proteins), sharey='row')
axes[0, 0].set_title("Boltzmann")
axes[0, 1].set_title("MD")
axes[0, 2].set_title("Timewarp")
for i, initial_idx_protein in enumerate(zip(initial_idxs, proteins)):
    initial_idx, protein = initial_idx_protein
    if len(protein) == 2:
        dataset = "2AA-1-huge"
    else:
        dataset = "4AA-huge"
    data_dir = base_dir + f".data/simulated-data/trajectory-data/{dataset}/{data_type}"
    npz_traj = np.load(data_dir+f'/{protein}-traj-arrays.npz')
    state0pdbpath = os.path.join(data_dir, f"{protein}-traj-state0.pdb")
    
    trajectory = md.Trajectory(
        xyz=npz_traj['positions'][::5],
        topology=md.load_topology(state0pdbpath)
    )
    tica_model = run_tica(trajectory, lagtime=100)
    feats = tica_features(trajectory)
    tics = tica_model.transform(feats)
    
    
    # all_coords_md = np.load()
    # sampled_coords = np.load()
    traj_conditional = md.Trajectory(
        xyz=all_coords_md,
        topology=md.load_topology(state0pdbpath)
    )
    traj_model = md.Trajectory(
        xyz=sampled_coords[:len(all_coords_md)],
        topology=md.load_topology(state0pdbpath)
    )
    traj_ini = md.Trajectory(
    xyz=npz_traj['positions'][initial_idx],
    topology=md.load_topology(state0pdbpath)
    )
    feat_ini = tica_features(traj_ini)
    tics_ini = tica_model.transform(feat_ini)
    feat_conditional = tica_features(traj_conditional)
    tics_conditional = tica_model.transform(feat_conditional)
    feat_model = tica_features(traj_model)
    tics_model = tica_model.transform(feat_model)
    plot_tic01_2(axes[i, 0], tics, tics_lims=tics)
    plot_tic01_2(axes[i, 1], tics_conditional, tics_lims=tics)
    plot_tic01_2(axes[i, 2], tics_model, tics_lims=tics)
    axes[n_proteins-1, 0].set_xlabel("TIC 0")
    axes[n_proteins-1, 1].set_xlabel("TIC 0")
    axes[n_proteins-1, 2].set_xlabel("TIC 0")
    axes[i, 0].set_ylabel("TIC 1")
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    axes[i, 2].set_xticks([])
    axes[i, 0].set_yticks([])
    axes[i, 1].scatter(tics_ini[:, 0], tics_ini[:, 1], marker="x", color="red", s=200, linewidths=5)
    axes[i, 2].scatter(tics_ini[:, 0], tics_ini[:, 1], marker="x", color="red", s=200, linewidths=5)

    #axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    plt.subplots_adjust(wspace=0, hspace=0.05)
pad = 5 # in points
for ax, row in zip(axes[:,0], proteins):
    ax.annotate(row, xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                xycoords=ax.yaxis.label, textcoords='offset points',
                size='large', ha='right', va='center', rotation=90)
if save:
    plt.savefig(base_dir+f"outputs/figures/TICA-conditional-{number}.svg", bbox_inches = "tight")

In [None]:
fig, axes = plt.subplots(n_proteins, 2, figsize=(12, 6*n_proteins), sharey='row')
for i, initial_idx_protein in enumerate(zip(initial_idxs, proteins)):
    initial_idx, protein = initial_idx_protein
    if len(protein) == 2:
        dataset = "2AA-1-huge"
    else:
        dataset = "4AA-huge"
    data_dir = base_dir + f".data/simulated-data/trajectory-data/{dataset}/{data_type}"
    npz_traj = np.load(data_dir+f'/{protein}-traj-arrays.npz')
    state0pdbpath = os.path.join(data_dir, f"{protein}-traj-state0.pdb")
    
    trajectory = md.Trajectory(
        xyz=npz_traj['positions'][::5],
        topology=md.load_topology(state0pdbpath)
    )
    tica_model = run_tica(trajectory, lagtime=100)
    feats = tica_features(trajectory)
    tics = tica_model.transform(feats)    
    
    # all_coords_md = np.load()
    # sampled_coords = np.load()
    traj_conditional = md.Trajectory(
        xyz=all_coords_md,
        topology=md.load_topology(state0pdbpath)
    )
    traj_model = md.Trajectory(
        xyz=sampled_coords[:len(all_coords_md)],
        topology=md.load_topology(state0pdbpath)
    )
    feat_conditional = tica_features(traj_conditional)
    tics_conditional = tica_model.transform(feat_conditional)
    feat_model = tica_features(traj_model)
    tics_model = tica_model.transform(feat_model)
    plot_free_energy2(axes[i, 0], tics_conditional[:, 0],"MD")
    plot_free_energy2(axes[i, 0], tics_model[:, 0],"Timewarp", linestyle="--")

    plot_free_energy2(axes[i, 1], tics_conditional[:, 1],"MD")
    plot_free_energy2(axes[i, 1], tics_model[:, 1],"Timewarp", linestyle="--")
    #axes[0].set_xlim(-np.pi,np.pi)
    axes[n_proteins-1, 0].set_xlabel("TIC 0")
    axes[i, 0].set_ylabel("$-\log(p)$")
    #axes[0].set_ylabel("F/k_B T")
    axes[n_proteins-1, 1].set_xlabel("TIC 1")
    #axes[1].xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    #axes[1].xaxis.set_major_locator(plt.MaxNLocator(2))
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    
axes[0, 0].set_title("TICA projections",  x=1.)
plt.subplots_adjust(wspace=0, hspace=0.05)

axes[0, 1].legend(fontsize=25)
if save:
    plt.savefig(base_dir+f"outputs/figures/free-energy-conditional-{number}.svg", bbox_inches = "tight")

In [None]:
from simulation.md import (
    get_simulation_environment,
    compute_energy_and_forces,
    compute_energy_and_forces_decomposition,
    get_parameters_from_preset, 
    get_simulation_environment_integrator,
    get_simulation_environment_for_force
)
from timewarp.utils.openmm import OpenmmPotentialEnergyTorch
import torch
parameters =  "T1B-peptides" # "alanine-dipeptide"
simulation = get_simulation_environment(state0pdbpath, parameters)
integrator = get_simulation_environment_integrator(parameters)
system = simulation.system


openmm_potential_energy_torch = OpenmmPotentialEnergyTorch(system, integrator, platform_name='CUDA')


In [None]:
fig, axes = plt.subplots(n_proteins, 1, figsize=(12, 6*n_proteins), sharex=True)

for i, initial_idx_protein in enumerate(zip(initial_idxs, proteins)):
    initial_idx, protein = initial_idx_protein
    if len(protein) == 2:
        dataset = "2AA-1-huge"
    else:
        dataset = "4AA-huge"
    data_dir = base_dir + f".data/simulated-data/trajectory-data/{dataset}/{data_type}"
    state0pdbpath = os.path.join(data_dir, f"{protein}-traj-state0.pdb")
    simulation = get_simulation_environment(state0pdbpath, parameters)
    integrator = get_simulation_environment_integrator(parameters)
    system = simulation.system


    openmm_potential_energy_torch = OpenmmPotentialEnergyTorch(system, integrator, platform_name='CUDA')
    
    energies_model = openmm_potential_energy_torch(torch.from_numpy(sampled_coords))
    energies_md_conditional = openmm_potential_energy_torch(torch.from_numpy(all_coords_md))
    
    min_val = np.min(np.concatenate([energies_model, energies_md_conditional]))
    max_val = np.max(np.concatenate([energies_model, energies_md_conditional]))
    lims = (min_val, min(max_val, -200))
    axes[i].hist(
        energies_md_conditional.cpu().numpy().flatten(),
        bins=100,
        density=True,
        label="MD",
        histtype='step',
        linewidth=5,
        range=lims,
        #alpha=0.5,
    )
    axes[i].hist(
        energies_model.cpu().numpy().flatten(),
        bins=100,
        density=True,
        label=f"Timewarp",
        histtype='step',
        linestyle='--',
        range=lims,
        linewidth=4,
        #alpha=0.5,
    )
    #axes[i].yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

    axes[n_proteins-1].set_xlabel("Energy in kJ/mol")
    axes[i].set_yticks([])
axes[0].set_title("Energy distribution")
#plt.xlabel("Energy in kJ/mol")
axes[0].legend();
plt.subplots_adjust(wspace=0, hspace=0.05)

if save:
    plt.savefig(base_dir+f"outputs/figures/energy-conditional-{number}.svg", bbox_inches = "tight")

In [None]:
KNOWN_ELEMENTS = ["C", "H", "N", "O", "S"]
ELEMENT_VOCAB = {e: i for i, e in enumerate(KNOWN_ELEMENTS)}



In [None]:
from timewarp.utils.evaluation_utils import compute_internal_coordinates

fig, axes = plt.subplots(n_proteins, 1, figsize=(12, 6*n_proteins), sharex=True)

for i, initial_idx_protein in enumerate(zip(initial_idxs, proteins)):
    initial_idx, protein = initial_idx_protein
    if len(protein) == 2:
        dataset = "2AA-1-huge"
    else:
        dataset = "4AA-huge"
    data_dir = base_dir + f".data/simulated-data/trajectory-data/{dataset}/{data_type}"
    state0pdbpath = os.path.join(data_dir, f"{protein}-traj-state0.pdb")
    topology=md.load_topology(state0pdbpath)
    atom_types = np.array([ELEMENT_VOCAB[a.element.symbol] for a in topology.atoms])
    adj_list =  np.array([(b.atom1.index, b.atom2.index) for b in topology.bonds])
    bond_types, coutns = np.unique(np.sort(atom_types[adj_list], axis=-1), return_counts=True, axis=0)
    
    
    bonds, torsions = compute_internal_coordinates(
        state0pdbpath, adj_list, sampled_coords[:len(all_coords_md)]
    )

    bonds_traj_conditional, torsions_traj_conditional = compute_internal_coordinates(
        state0pdbpath, adj_list, all_coords_md
    )
    for bond_type in bond_types:

        bond_idxs = np.where(np.all(np.sort(atom_types[adj_list], axis=-1) == bond_type, axis=1))[0]
        bond_md = bonds_traj_conditional[:, bond_idxs]
        bond_model = bonds[:, bond_idxs]
        axes[i].hist(bond_md.flatten(), bins=100, density=True, histtype='step',linewidth=4, label="MD", color="C0");
        axes[i].hist(bond_model.flatten(), bins=100, density=True, histtype='step',linewidth=3, label="Timewarp",  color="C1", linestyle="--", range=(0.08, 0.18));
        if np.all(bond_type == bond_types[0]) and i==0:
            axes[0].legend();

    axes[n_proteins-1].set_xlabel("Bondlength in nm")
    axes[i].set_yticks([])
axes[0].set_title("Bondlength distribution")
#plt.xlabel("Energy in kJ/mol")
#axes[0].legend();
plt.subplots_adjust(wspace=0, hspace=0.05)

if save:
    plt.savefig(base_dir+f"outputs/figures/bonds-conditional-{number}.svg", bbox_inches = "tight")