# Batch Run Example
This notebook shows how to load a large amount of data and execute runs on them in parallel

In [None]:
import os
import asyncio
from glob import glob

from datetime import datetime
from pathlib import Path

import tengu

### 0) Setup

In [None]:
# Set our token - ensure you have exported TENGU_TOKEN in your shell; or just replace the os.getenv with your token
TOKEN = os.getenv("TENGU_TOKEN")
URL = os.getenv("TENGU_URL")

In [None]:
# Define our project information
DESCRIPTION = "tengu-py batch notebook"
TAGS = ["qdx", "tengu-py-v2", "demo", "batch", "nogpu"]
WORK_DIR = Path.home() / "qdx" / "tengu-py-batch-demo"

# Set our inputs
PROTEIN_PDB_PATH = WORK_DIR / "protein.pdb"
LIGAND_PDB_FOLDER_PATH = WORK_DIR / "ligands"  # should contain ligands named [name].pdb

TARGET = "GADI"
NUM_GPUS = 0

## Initialize our tengu client and fetch available module paths

In [None]:
# Get our client, for calling modules and using the tengu API
client = await tengu.build_provider_with_functions(
    access_token=TOKEN, url=URL, workspace=WORK_DIR, batch_tags=TAGS
)

building for  github:talo/tengu-prelude/083e958d64e9e716754f76560bd3e60557e6a691#hermes_binding
building for  github:talo/gmx_tengu_support/68cfc9dc1533bd54dfa21b346642410ff0fce570#gmx_frame_select_pdb
building for  github:talo/gmx_tengu_support/a15ebf3e86b779203b463193c0ecc025c9f916df#gmx_frame_select
building for  github:talo/gmx_tengu_support/d4ea797dcdedf9b91de1b76a32f8a95f0cbf21df#gmx_mmpbsa_tengu
building for  github:talo/tengu-module-example/b334a851530033b79762c3341bf584f8939feee1#tengu_echo
building for  github:talo/tengu-module-example/b334a851530033b79762c3341bf584f8939feee1#spam
building for  github:talo/tengu-module-example/b334a851530033b79762c3341bf584f8939feee1#delay
building for  github:talo/tengu-module-flake-parts/b6afd5abb624fed203990045927d381cefc6ef04#dummy
building for  github:talo/gmx_tengu_support/a473bc4a302eebebcb5f54a899192be75c0daa91#gmx_tengu_pdb
building for  github:talo/gmx_tengu_support/a473bc4a302eebebcb5f54a899192be75c0daa91#gmx_tengu
building for  gi

## For each ligand, start a gmx + gmx_mmpbsa run

In [None]:
ligands = map(lambda x: Path(x), glob(str(LIGAND_PDB_FOLDER_PATH / "*.pdb")))

gmx_config = {
    "param_overrides": {
        "md": [("nsteps", "5000")],
        "em": [("nsteps", "1000")],
        "nvt": [("nsteps", "1000")],
        "npt": [("nsteps", "1000")],
        "ions": [],
    },
    "num_gpus": NUM_GPUS,
    "num_replicas": 1,
    "ligand_charge": None,
    "frame_sel": {"begin_time": 1, "delta_time": 1, "end_time": 2},
}
gmx_resources = tengu.Resources(gpus=NUM_GPUS, cpus=48, storage=2, storage_units="GB", walltime=60)

mmpbsa_config = {
    "start_frame": 1,
    "end_frame": 10,
    "num_cpus": 8,  # cannot be greater than number of frames
}

mmpbsa_resources = tengu.Resources(storage=2, storage_units="GB", walltime=600)

gmx_outputs = []
mmpbsa_outputs = []
for ligand_path in ligands:
    name = ligand_path.stem
    (gmx_output_tar, wet, dry, gmx_ligand_gro_tar, extra) = await client.gmx_pdb(
        PROTEIN_PDB_PATH,
        ligand_path,
        gmx_config,
        target=TARGET,
        resources=gmx_resources,
        tags=[name],
        restore=True,
    )
    gmx_outputs.append((name, gmx_output_tar, wet, dry, gmx_ligand_gro_tar))

    (mmpbsa_output_tar,) = await client.gmx_mmpbsa(
        gmx_output_tar, mmpbsa_config, target=TARGET, resources=mmpbsa_resources, tags=[name], restore=True
    )
    mmpbsa_outputs.append((name, mmpbsa_output_tar))
    print(f"{datetime.now().time()} | Running GROMACS MM-PBSA calculation!")

10:44:52.263820 | Running GROMACS MM-PBSA calculation!


## Report progress
This will show the status of all of your runs

In [None]:
status = await client.status(group_by="path")
print(f"{'Module':<20} | {'Status':<20} | Count")
print("-" * 50)
for module, (status, path, count) in status.items():
    print(f"{path:<20} | {status:<20} | {count}")

Module               | Status               | Count
--------------------------------------------------
gmx_mmpbsa           | COMPLETED            | 3
gmx_pdb              | COMPLETED            | 5


## Download Results
This will retrieve results for your completed module_instances

In [None]:
await asyncio.gather(
    *[output[1].download(filename=f"mmpbsa_{output[0]}.tar.gz") for output in mmpbsa_outputs]
)

[None]

## Check failures
This will retrieve failed runs in your workspace history

In [None]:
for instance_id, (status, name, count) in (await client.status()).items():
    if status.value == "FAILED":
        async for log_page in client.logs(instance_id, "stderr"):
            for log in log_page:
                print(log)