# Hermes — Run a basic Hartree-Fock energy calculation

In this notebook, we'll perform a Hermes calculation to get the total Hartree Fock energy on a protein that we prepare with PDBFixer and PDB2PQR.

In [20]:
import json
import os
import sys
import tarfile

from pdbtools import *
import requests
from datetime import datetime
from pathlib import Path
import py3Dmol

import rush

## 0) Setup

In [21]:
EXPERIMENT = "rush-py hermes energy demo"
SYSTEM = "1B39"
LIGAND = "ATP"
TAGS = ["qdx", EXPERIMENT, SYSTEM, LIGAND]

In [22]:
# |hide
WORK_DIR = Path.home() / "qdx" / EXPERIMENT

if WORK_DIR.exists():
    client = rush.Provider(workspace=WORK_DIR)
    await client.nuke(remote=False)

os.makedirs(WORK_DIR, exist_ok=True)
os.makedirs(WORK_DIR / ".rush", exist_ok=True)
import sys

os.chdir(WORK_DIR)

Build our client

In [23]:
client = await rush.build_provider_with_functions(batch_tags=TAGS)

In [6]:
# |hide
client = await rush.build_provider_with_functions(batch_tags=TAGS, workspace=WORK_DIR, restore_by_default=True)

## 0.1) Input selection

In [24]:
# fetch datafiles
PROTEIN_PDB_PATH = client.workspace / f"{SYSTEM}_P.pdb"

complex = list(pdb_fetch.fetch_structure(SYSTEM))
protein = pdb_delhetatm.remove_hetatm(pdb_selchain.select_chain(complex, "A"))
# write our files to the locations defined in the config block
with open(PROTEIN_PDB_PATH, "w") as f:
    for l in protein:
        f.write(str(l))

## 1) Preparation
We want to convert our raw files into prepared qdxfs, with correct charges and missing residues filled

In [25]:
(prepared_protein_qdxf, prepared_protein_pdb) = await client.prepare_protein(
    PROTEIN_PDB_PATH
)

print(f"{datetime.now().time()} | Running protein preparation!")

06:28:08.628906 | Running protein preparation!


In [26]:
try:
    await prepared_protein_pdb.download(filename="01_prepared_protein.pdb")
except FileExistsError:
    # we will raise an error if you try to overwrite an existing file, you can
    # force the file to overwrite by passing an absolute filepath instead
    pass

2024-02-13 06:28:47,222 - rush - INFO - Argument 64419a21-fdd1-4caf-99d7-0baa9c638a47 is now ModuleInstanceStatus.DISPATCHED
2024-02-13 06:28:53,829 - rush - INFO - Argument 64419a21-fdd1-4caf-99d7-0baa9c638a47 is now ModuleInstanceStatus.RUNNING
2024-02-13 06:29:14,568 - rush - INFO - Argument 64419a21-fdd1-4caf-99d7-0baa9c638a47 is now ModuleInstanceStatus.AWAITING_UPLOAD


You should visualize your prepared protein to spot check any incorrectly transformed residues

In [27]:
view = py3Dmol.view()
with open(client.workspace / "objects" / "01_prepared_protein.pdb", "r") as f:
    view.addModel(f.read(), "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})
    view.zoomTo()
    view.show()

## 2) Fragmentation
We need to split our protein into smaller fragments so that we don't run out of GPU memory. This will fragment the protein into roughly 1 amino acid per fragment.

First, we turn our list of conformers into a single conformer.

In [28]:
help(client.pick_conformer)

Help on function pick_conformer in module rush.provider:

async pick_conformer(*args: *tuple[RushObject[list[Conformer]], int], target: Optional[Target] = None, resources: Optional[Resources] = {'storage': 10, 'storage_units': 'MB', 'gpus': 0}, tags: list[str] | None = None, restore: bool | None = None) -> tuple[RushObject[Conformer]]
    Selects a single conformer from a vec of conformers. NOTE: will be deprecated in future once mapping is supported
    
    Module version:  
    `github:talo/tengu-prelude/efc6d8b3a8cc342cd9866d037abb77dac40a4d56#pick_conformer`
    
    QDX Type Description:
    
        in: @[Conformer];
        in: u8
        ->
        out: @Conformer



In [29]:
# there may be multiple conformers, so select the first one
(first_conformer,) = await client.pick_conformer(
    prepared_protein_qdxf, 0
)

Now that we have a single conformer, we need to fragment it.

In [30]:
help(client.fragment_aa)

Help on function fragment_aa in module rush.provider:

async fragment_aa(*args: *tuple[RushObject[Conformer], int, EnumValue], target: Optional[Target] = None, resources: Optional[Resources] = {'storage': 10, 'storage_units': 'MB', 'gpus': 0}, tags: list[str] | None = None, restore: bool | None = None) -> tuple[RushObject[Conformer]]
    Fragments and infers formal charges for a protein QDX file by cutting the backbone at the alpha-beta carbon bonds every n steps, with more accurate amino-acid based bond perception
    
    Module version:  
    `github:talo/tengu-prelude/efc6d8b3a8cc342cd9866d037abb77dac40a4d56#fragment_aa`
    
    QDX Type Description:
    
        input: @Conformer;
        steps: u8;
        strictness: None | Heavy | All
        ->
        output: @Conformer
    
    :param input: QDX Conformer file
    :param steps: number of steps to skip between each fragment
    :param strictness: strictness of atom checking - recommended to use All
    :return output: QDX co

In [31]:
(fragmented_protein,) = await client.fragment_aa(
    first_conformer, 1, "All"
)

## 3) Quantum energy calculation
Finally, we submit our fragmented protein for quantum energy calculation, with custom configuration.

In [32]:
help(client.hermes_energy)

Help on function hermes_energy in module rush.provider:

async hermes_energy(*args: *tuple[RushObject[Conformer], Record, Optional[Record], Optional[Record]], target: Optional[Target] = None, resources: Optional[Resources] = {'storage': 1034, 'storage_units': 'MB', 'gpus': 4}, tags: list[str] | None = None, restore: bool | None = None) -> tuple[RushObject[Record]]
    Runs a HERMES energy calculation given a topology, and optionally model and keyword configurations.
    Will use the default model and keywords if none are provided
    
    Module version:  
    `github:talo/tengu-prelude/e94a09ac64990a91cf1fcd33858d565a3d33ba09#hermes_energy`
    
    QDX Type Description:
    
        input: @Conformer;
        system: {
            gpus_per_team: u32?,
            max_gpu_memory_mb: u32?,
            oversubscribe_gpus: bool?,
            teams_per_node: u32?
        };
        model: {
            aux_basis: string?,
            basis: string,
            force_cartesian_basis_sets: 

In [33]:
HERMES_RESOURCES = {
        "gpus": 4,
        "storage": 100,
        "storage_units": "MB",
        "walltime": 60 * 4,
}

In [None]:
fragmented_protein_out = await fragmented_protein.get()
(hermes_energy,) = await client.hermes_energy(
    fragmented_protein,
    {},
    {
        "basis": "STO-3G",
        "aux_basis": "6-31G",
        "method": "RestrictedRIMP2",
    },  # configuration for a fast converging, low accuracy run
    {
        "guess": {},
        "scf": {
            "max_iter": 50,
            "max_diis_history_length": 12,
            "convergence_metric": "DIIS",
            "dynamic_screening_threshold_exp": 10,
            "ndiis": 8,
            "niter": 40,
            "scf_conv": 0.000001,
        },
          "frag": {
        "cutoffs": {"dimer": 22},
        "cutoff_type": "Centroid",
        "level": "Dimer",
        "reference_fragment": len(fragmented_protein_out["topology"]["fragments"])
        - 1,
        },
        "resources": HERMES_RESOURCES,
    },
)

2024-02-13 06:30:16,699 - rush - INFO - Argument d23b290d-6d24-4cbb-bcaa-8ba06da0eb91 is now ModuleInstanceStatus.RESOLVING


In [18]:
energy = await hermes_energy.get()

In [1]:
# We can optionally print the logs here
PRINT_LOGS = False

if PRINT_LOGS:
    async for log_page in client.logs(
        hermes_energy.source, kind="stdout", print_logs=False
    ):
        for log in log_page:
            print(log)