# Torsion analysis


In [None]:
import os
import sys
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.rc('font', size=30) 

from tqdm.auto import tqdm
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")      

In [None]:
module_path = os.path.abspath(os.path.join('../../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from timewarp.utils.torsion_utils import get_all_torsions, get_all_torsions_model, compute_torsions
from timewarp.utils.training_utils import load_model

from simulation.md import get_simulation_environment   


### Load model

In [None]:
savefile = '../../outputs/custom_attention_transformer_nvp_stepwidth_1000_2022-01-06_16-29-19/best_model.pt'
model = load_model(path=savefile).to(device)
config = torch.load(savefile)["training_config"]
step_width = config.step_width

### Specify the location and name of the data

In [None]:
protein = 'ad2'
dataset = 'AD-1'
parameters =  "alanine-dipeptide"
data_dir = f'../../.data/simulated-data/trajectory-data/{dataset}/test'
state0pdbpath = os.path.join(data_dir, f"{protein}-traj-state0.pdb")


Get the torsion angle for the first 10000 target samples in the test set

In [None]:
dihedrals = get_all_torsions(protein, data_dir, step_width, n_initial_samples=10000, targets=True);

In [None]:
dihedral_names = ["phi", "psi", "chi1", "chi2", "chi3", "chi4", "omega"]
dihedral_atoms = ["(C-C-N-C)", "(C-N-C-C)", "", "", "", "", "(N-C-C-N)"]

### Ramachandran-Plot for Alanine Dipeptide - MD samples

In [None]:
import matplotlib as mpl

plt.figure(figsize=(10, 10))
plt.title('Alanine Dipeptide - MD')
plt.hist2d(dihedrals.phi[:, 0, 0], dihedrals.psi[:, 0, 0], bins=100, norm=mpl.colors.LogNorm())
plt.xlabel(dihedral_names[0])
plt.ylabel(dihedral_names[1]);

Get the torsion angle for the first 10000 conditioning samples in the test set. 

The output will be List with six elements, i.e. one for each of the possible 7 torsion angles types. Each element contains a np-array with shape [B, S, n_angles], where B is the number of initial samples n_initial_samples, S is the number of samples generated from a single conditioning state (this is one for openMM trajectories) and the number of torsion angles of that type in the molecule. 

In the case of Alanine Dipeptide, there are only one phi and one psi angle.

In [None]:
dihedrals_conditioning = get_all_torsions(protein, data_dir, step_width, n_initial_samples=10000, targets=False);

# Plot transitions of the phi angle

In [None]:
anlge_idx = 0
plt.figure(figsize=(10, 10))
plt.title('Alanine Dipeptide phi transitions - openMM')
plt.hist2d(dihedrals_conditioning.phi[:, 0, 0], dihedrals.phi[:, 0, 0], bins=100, norm=mpl.colors.LogNorm())
plt.xlabel(f"Initial {dihedral_names[anlge_idx]} angle")
plt.ylabel(f"Sampled {dihedral_names[anlge_idx]} angle")

Now we want to get the torsion angles of model samples as well. As the model is currently unable to produce a Markov Chain on its own, we sample conditioned on test set initial samples. 

In [None]:
dihedrals_model = get_all_torsions_model(protein, data_dir, step_width, model, n_initial_samples=10000, n_samples_model=1, device=device);

The corresponding Ramachandran and transition plots look quite well. However, remember that we conditioned on samples from the test set. 

In [None]:
plt.figure(figsize=(10, 10))
plt.title('Alanine Dipeptide - model')
plt.hist2d(dihedrals_model.phi[:, 0, 0], dihedrals_model.psi[:, 0, 0], bins=100, norm=mpl.colors.LogNorm())
plt.xlabel(dihedral_names[0])
plt.ylabel(dihedral_names[1]);

In [None]:
anlge_idx = 0
plt.figure(figsize=(10, 10))
plt.title('Alanine Dipeptide phi transitions - model')
plt.hist2d(dihedrals_conditioning.phi[:, 0, 0], dihedrals_model.phi[:, 0, 0], bins=100, norm=mpl.colors.LogNorm())
plt.xlabel(f"Initial {dihedral_names[anlge_idx]} angle")
plt.ylabel(f"Sampled {dihedral_names[anlge_idx]} angle")

## Individual torsion distributions

We can also look at individual torsion angles. 

In [None]:
for i, angle_name in enumerate(dihedral_names):
    # As there is only one torsion angle per angle typ, we don not iterate over j
    j = 0
    angle = getattr(dihedrals, angle_name)
    angle_model = getattr(dihedrals_model, angle_name)
    # skip if torsion angle is empty
    if angle.size == 0:
        continue
    plt.figure(figsize=(16,9))
    plt.hist(angle_model[:,0, j], bins=100, label="model", density=True, alpha=0.5)
    plt.hist(angle[:,0, j], bins=100, label="OpenMM", density=True, alpha=0.5)
    plt.legend()
    plt.xlabel("Angle in Rad")
    plt.title(angle_name+f"-{j}"+f" {dihedral_atoms[i]}")

## Torsion distribution for a single conditioning state
This can be used to compare how good we match conditional target distribution.
First choose a some conditioning state. In this case we use a conditioning state from the test data

In [None]:
traj_dir = os.path.join(data_dir, f"{protein}-traj-arrays.npz")
traj_npz = np.load(traj_dir)
conditioning_coords = traj_npz['positions'][0]
conditioning_velocs = traj_npz['velocities'][0]


As we do not have such data in the data sets, we have to create them with openMM

In [None]:
sim = get_simulation_environment(state0pdbpath, parameters)
n_samples= 100
positions = []
for n in tqdm(range(0, n_samples)):
    sim.context.setPositions(conditioning_coords)
    sim.context.setVelocities(conditioning_velocs)
    positions_traj = []
    sim.step(step_width)
    state = sim.context.getState(getPositions=True)
    positions_traj.append(state.getPositions(asNumpy=True)._value)
    positions.append(positions_traj)
    

The shape of the positions has to be [B, S, V, 3], where B is the number of initial conditional samples, S is the number of sampler per conditional sample, and V is the number of atoms in the molecule.

In [None]:
positions = np.array(positions)
positions = np.transpose(positions, axes=(1,0,2,3))


In [None]:
dihedrals_single_conditioning = compute_torsions(positions, state0pdbpath)

Now we produce samples form th same conditioning state with the model

In [None]:
dihedrals_single_conditioning_model =  get_all_torsions_model(protein, data_dir, step_width, model, n_initial_samples=1, n_samples_model=1000, device=device)


Ideally the torsion distribution of the model would be close to the distribution created with openMM

In [None]:
for i, angle_name in enumerate(dihedral_names):
    # As there is only one torsion angle per angle typ, we don not iterate over j
    j = 0
    angle_target = getattr(dihedrals, angle_name)
    angle_conditional = getattr(dihedrals_conditioning, angle_name)
    angle_model = getattr(dihedrals_single_conditioning_model, angle_name)
    angle_openmm = getattr(dihedrals_single_conditioning, angle_name)
    # skip if torsion angle is empty
    if angle_target.size == 0:
        continue
    plt.figure(figsize=(16,9))
    plt.hist(angle_target[:, 0, j], bins=100, label="Gound truth", density=True, alpha=0.1)
    plt.hist(angle_openmm[0, :, j], bins=30, label=f"OpenMM stepwidth {step_width}", density=True, alpha=0.5)
    plt.hist(angle_model[0, :, j], bins=100, label=f"Model stepwidth {step_width}", density=True, alpha=0.5)
    plt.axvline(angle_conditional[0, 0, j], 0, 0.9, color="green", linewidth=5, label="conditioning")
    plt.axvline(angle_target[0, 0, j], 0, 0.9, color="black", linewidth=5, label="target")
    plt.legend()
    plt.xlabel("Angle in Rad")
    plt.title(angle_name+f"-{j}"+f" {dihedral_atoms[i]}")