# prepare_protein — Run a batch protein preperation in parallel

See [the tutorial](/Tutorials/batch_run-protein-prep.ipynb).

In [None]:
# |hide
!mkdir -p ~/qdx/qs_batch_run_protein_prep/
!cd ~/qdx/qs_batch_run_protein_prep/

In [None]:
# Get PDBs to work with - we use the pdb-tools cli here but you can download directly from rcsb.org
!pdb_fetch '1b39' | pdb_delhetatm > '1B39_nohet.pdb'
!pdb_fetch '4qxi' | pdb_delhetatm > '4QXI_nohet.pdb'
!pdb_fetch '8fsu' | pdb_delhetatm > '8FSU_nohet.pdb'

In [None]:
# |hide
import os
import rush
from pathlib import Path

WORK_DIR = Path.home() / "qdx" / "qs_batch_run_protein_prep"

if WORK_DIR.exists():
    client = rush.Provider(workspace=WORK_DIR)
    await client.nuke(remote=False)

os.makedirs(WORK_DIR, exist_ok=True)
os.chdir(WORK_DIR)

In [None]:
import rush

client = await rush.build_provider_with_functions(
    batch_tags=["batch_run_protein_prep"]
)

In [None]:
import asyncio
from pathlib import Path
from glob import glob

protein_outputs = []
for protein_path in glob(str(Path.cwd() / "*.pdb")):
    protein_path = Path(protein_path)
    name = protein_path.stem

    prepped_protein_qdxf, prepped_protein_pdb = await client.prepare_protein(
        protein_path,
        tags=["batch_run_protein_prep"],
        resources={"gpus": 1},
    )
    protein_outputs.append((name, prepped_protein_qdxf, prepped_protein_pdb))

await asyncio.gather(
    [
        output[1].download(filename=f"protein_{output[0]}_prepared.qdxf.json")
        for output in protein_outputs
    ]
    + [
        output[2].download(filename=f"protein_{output[0]}_prepared.pdb")
        for output in protein_outputs
    ],
)