# prepare_protein — Run a batch protein preperation in parallel

See [the tutorial](/Tutorials/batch_run-protein-prep.ipynb).

This quickstart uses the async functionality to download files in parallel. If running outside of a Juptyter notebook, you will need to wrap the code in an async main function like this: 
``` python
import asyncio
async def main():
    #your code here
asyncio.run(main())
```


In [None]:
# |hide
WORK_DIR = '~/qdx/qs_batch_run_protein_prep'
%cd -q ~
!rm -r $WORK_DIR || true
!mkdir -p $WORK_DIR
%cd -q $WORK_DIR

In [None]:
# Get PDBs to work with - we use the pdb-tools cli here but you can download directly from rcsb.org
!pdb_fetch '1b39' | pdb_delhetatm > '1B39_nohet.pdb'
!pdb_fetch '4qxi' | pdb_delhetatm > '4QXI_nohet.pdb'
!pdb_fetch '8fsu' | pdb_delhetatm > '8FSU_nohet.pdb'

In [None]:
import asyncio
from pathlib import Path
from glob import glob

import rush

client = await rush.build_provider_with_functions(
    batch_tags=["batch_run_protein_prep"]
)

protein_outputs = []
for protein_path in glob(str(Path.cwd() / "*.pdb")):
    print("preparing", protein_path)
    protein_path = Path(protein_path)
    name = protein_path.stem

    prepped_protein_qdxf, prepped_protein_pdb = await client.prepare_protein(
        protein_path,
        tags=["batch_run_protein_prep"],
        resources={"gpus": 1, "storage": "10", "storage_units": "MB" },
    )
    protein_outputs.append((name, prepped_protein_qdxf, prepped_protein_pdb))

await asyncio.gather(
    *([
        output[1].download(filename=f"protein_{output[0]}_prepared.qdxf.json")
        for output in protein_outputs
    ]
    + [
        output[2].download(filename=f"protein_{output[0]}_prepared.pdb")
        for output in protein_outputs
    ])
)

preparing /home/machineer/qdx/qs_batch_run_protein_prep/8FSU_nohet.pdb
preparing /home/machineer/qdx/qs_batch_run_protein_prep/1B39_nohet.pdb
preparing /home/machineer/qdx/qs_batch_run_protein_prep/4QXI_nohet.pdb
2024-03-16 10:14:28,870 - rush - INFO - Argument 49db9ee4-b836-42c0-a726-f262028f1495 is now ModuleInstanceStatus.RESOLVING
2024-03-16 10:14:28,963 - rush - INFO - Argument 6f70de2a-a6fb-4486-a358-b059d7ac769f is now ModuleInstanceStatus.RESOLVING
2024-03-16 10:14:28,999 - rush - INFO - Argument 0d414ada-d303-406b-822c-aa54fe2e7919 is now ModuleInstanceStatus.RESOLVING
2024-03-16 10:14:29,015 - rush - INFO - Argument 870c76e8-fbfa-4e6f-8e82-2835cf58380d is now ModuleInstanceStatus.ADMITTED
2024-03-16 10:14:29,018 - rush - INFO - Argument 24ce1191-80fd-4478-8bf9-349427f89d56 is now ModuleInstanceStatus.RESOLVING
2024-03-16 10:14:29,941 - rush - INFO - Argument 49db9ee4-b836-42c0-a726-f262028f1495 is now ModuleInstanceStatus.ADMITTED
2024-03-16 10:14:30,035 - rush - INFO - Argum

[PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_8FSU_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_1B39_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_4QXI_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_8FSU_nohet_prepared.pdb'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_1B39_nohet_prepared.pdb'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_4QXI_nohet_prepared.pdb')]

In [None]:
OUTPUT_DIR = client.workspace / "objects"
!ls $OUTPUT_DIR

protein_1B39_nohet_prepared.pdb        protein_4QXI_nohet_prepared.qdxf.json
protein_1B39_nohet_prepared.qdxf.json  protein_8FSU_nohet_prepared.pdb
protein_4QXI_nohet_prepared.pdb        protein_8FSU_nohet_prepared.qdxf.json
