In [1]:
%reload_ext autoreload
%autoreload 2

# Testing HEroBM model #

### 1. Download Testing Dataset ###

In [None]:
!wget url ../data/a2a.npz

### 2. Create the inference config file ###

Specify the following parameters:
- mapping: which is the CG mapping to use
- isatomistic: set to True if the input file is at atomistic resolution. In this case, the input will first be converted to CG, according to the specified mapping, then the model will be used to backmap the CG back again to atomistic resolution.
- input: input file to backmap. It could be either a pdb, gro or any formatcompatible with MDAnalysis.
- selection: optional selection of atoms/beads/residues/molecules to apply on input as a pre-processing
- model: could be either a deployed model or the configuration file used for training.
- output: Optionally, you can provide a folder where to save the backmapped result

In [None]:
# args_dict = {
#     "mapping": "martini3",
#     "isatomistic": True,
#     "input": "/storage_common/angiod/A2A/tpr/a2a.tpr",
#     "inputtraj": "/storage_common/angiod/A2A/trr/a2a.trr",
#     "trajslice": "900:901:1",
#     "selection": "protein",
#     "output": "backmapped/tutorial/A2A.martini",
#     "model": "config/tutorial.train.yaml",
# }

# args_dict.update({
#     "device": "cuda:3",
#     "batch_max_atoms": 10000,
#     "noinvariants": True,
# })

import os
from herobm.backmapping.hierarchical_backmapping import HierarchicalBackmapping

def run_backmapping(fr, pdb):
    f = os.path.join(fr, pdb)
    # args_dict = {
    #     "mapping": "martini3",
    #     "isatomistic": False,
    #     "input": f"/storage_common/angiod/{f}",
    #     # "inputtraj": "",
    #     # "trajslice": "0:10",
    #     "selection": "protein",
    #     "output": f"backmapped/Vince/A2A/{f}",
    #     "model": "/home/angiod@usi.ch/HEroBM/results/A2A/A2A.martini3/best_model.pth",
    # }
    # args_dict = {
    #     "mapping": "zma",
    #     "isatomistic": False,
    #     "input": f"/storage_common/angiod/Vince/{f}",
    #     # "inputtraj": "",
    #     # "trajslice": "0:10",
    #     "selection": "resname ZMA",
    #     "output": f"backmapped/Vince/ZMA/{f}",
    #     "model": "/home/angiod@usi.ch/HEroBM/results/zma/ZMA.martini3/best_model.pth",
    # }
    args_dict = {
        "mapping": "/home/angiod@usi.ch/HEroBM/mappings/neca",
        "isatomistic": False,
        "input": f"/storage_common/angiod/Vince/{f}",
        # "inputtraj": "",
        # "trajslice": "0:10",
        "selection": "resname NEC",
        "output": f"backmapped/Vince/NECA/{f}",
        "model": "/home/angiod@usi.ch/HEroBM/results/neca/NECA.martini3/best_model.pth",
    }

    args_dict.update({
        "device": "cuda:0",
        "batch_max_atoms": 3000,
        "noinvariants": True,
    })

    backmapping = HierarchicalBackmapping(args_dict=args_dict)

    backmapped_filenames, backmapped_minimised_filenames, true_filenames, cg_filenames = backmapping.backmap(
        optimise_backbone=False,
        tolerance=500., # Value in Kj/(mol nm). Threshold to stop energy minimisation.
    )

# exopicks
fr = "2022/5-PR1-CBA/Extracts/Set-B/exopicks/"
pdbs = ["EX-S7-R09-F00004.pdb", "EX-S7-R09-F02032.pdb", "EX-S7-R09-F05081.pdb", "EX-S7-R09-F05476.pdb",
        "EX-S7-R09-F07086.pdb", "EX-S7-R09-F20635.pdb", "EX-S7-R09-F21898.pdb", "EX-S7-R09-F28065.pdb"]

# necpicks
# fr = "2022/5-PR1-CBA/Extracts/Set-B/necpicks/"
# pdbs = ["EX-R002-F00287.pdb", "EX-R002-F00343.pdb", "EX-R002-F00423.pdb", "EX-R002-F01383.pdb", "EX-R002-F01569.pdb",
#         "EX-R002-F00288.pdb",  "EX-R002-F00416.pdb",  "EX-R002-F00425.pdb",  "EX-R002-F01387.pdb",  "EX-R002-F01578.pdb"]

# zmapicks
# fr = "2022/5-PR1-CBA/Extracts/Set-B/zmapicks/"
# pdbs = ["EX-R009-F00038.pdb", "EX-R009-F00040.pdb", "EX-R009-F00042.pdb", "EX-R009-F00759.pdb", "EX-R009-F00794.pdb"]
for pdb in pdbs:
    run_backmapping(fr, pdb)

### 3. Run backmapping ###

Optionally, you can run the backbone optimisation, which adjusts BB atoms in case they end up in unphysical positions.
'tolerance' parameter is used as a threshold for energy minimisation.

Note: if you test the A2A model on other proteins, most probably the energy minimised version will have a lower RMSD with respect to the raw backmapped version. This is expected, as the model was trained on a single system and is not yet able to generalise extremely well, thus might create clashes in some cases. Running even mild energy minimisation fixes any clash and gives a sound structure to simulate.

After training completion, you will find the results in the './results/tutorial/A2A.martini' folder.

Model weights are saved as 'best_model.pth', while the 'config.yaml' file contains all the directives to load the model, to either perform inference or to continue training/fine-tune the model.

In [None]:
import MDAnalysis as mda
from MDAnalysis.analysis import rms
import nglview as nv

backmapped_u = mda.Universe(*backmapped_filenames)
ref_u = mda.Universe(*true_filenames)

print("RMSD BB:", rms.rmsd(backmapped_u.select_atoms('name N CA C').positions, ref_u.select_atoms('name N CA C').positions, superposition=False))
print("RMSD SC:", rms.rmsd(backmapped_u.select_atoms('protein and not name N CA C O').positions, ref_u.select_atoms('protein and not name N CA C O').positions, superposition=False))

sel = 'protein'
w = nv.show_mdanalysis(backmapped_u)
w.add_representation('licorice', selection='protein')
w

Check reconstructed and original together

In [None]:
merged_u = mda.Merge(backmapped_u.select_atoms(sel), ref_u.select_atoms(sel))

w = nv.show_mdanalysis(merged_u)
w.add_representation('licorice', selection='protein')
w

Finally, have a look to the minimised structure and compare it to the ground truth

In [None]:
backmapped_minimised_u = mda.Universe(*backmapped_minimised_filenames)
merged_minimised_u = mda.Merge(backmapped_minimised_u.select_atoms(sel), ref_u.select_atoms(sel))

s = 'name N CA C O and not resname ACE NME'
print("RMSD BB:", rms.rmsd(backmapped_minimised_u.select_atoms(s).positions, ref_u.select_atoms(s).positions, superposition=False))
s = 'protein and not name N CA C O OXT and not type H and not resname ACE NME'
print("RMSD SC:", rms.rmsd(backmapped_minimised_u.select_atoms(s).positions, ref_u.select_atoms(s).positions, superposition=False))

w = nv.show_mdanalysis(merged_minimised_u)
w.add_representation('licorice', selection='protein')
w