# prepare_protein — Run a batch protein preperation in parallel

See [the tutorial](/Tutorials/batch_run-protein-prep.ipynb).

This quickstart uses the async functionality to download files in parallel. If running outside of a Juptyter notebook, you will need to wrap the code in an async main function like this: 
``` python
import asyncio

async def main():
    #your code here
    
asyncio.run(main())
```

In [None]:
# |hide
import os
import pathlib

WORK_DIR = pathlib.Path("~/qdx/qs_batch_run_protein_prep").expanduser()
if WORK_DIR.exists():
    !rm -r $WORK_DIR
os.makedirs(WORK_DIR, exist_ok=True)
os.chdir(WORK_DIR)

In [None]:
# Get PDBs to work with
# We use the pdb-tools cli here but you can download directly from rcsb.org
!pdb_fetch '1b39' | pdb_delhetatm > '1B39_nohet.pdb'
!pdb_fetch '4qxi' | pdb_delhetatm > '4QXI_nohet.pdb'
!pdb_fetch '8fsu' | pdb_delhetatm > '8FSU_nohet.pdb'

In [None]:
import asyncio
from glob import glob
from pathlib import Path

import rush

client = await rush.build_provider_with_functions(
    batch_tags=["batch_run_protein_prep"]
)

protein_outputs = []
for protein_path in glob(str(Path.cwd() / "*.pdb")):
    print("preparing", protein_path)
    protein_path = Path(protein_path)
    name = protein_path.stem

    prepped_protein_qdxf, prepped_protein_pdb = await client.prepare_protein(
        protein_path,
        tags=["batch_run_protein_prep"],
        resources={"gpus": 1, "storage": "10", "storage_units": "MB"},
    )
    protein_outputs.append((name, prepped_protein_qdxf, prepped_protein_pdb))

await asyncio.gather(
    *(
        [
            output[1].download(filename=f"protein_{output[0]}_prepared.qdxf.json")
            for output in protein_outputs
        ]
        + [
            output[2].download(filename=f"protein_{output[0]}_prepared.pdb")
            for output in protein_outputs
        ]
    )
)

preparing /home/machineer/qdx/qs_batch_run_protein_prep/8FSU_nohet.pdb
preparing /home/machineer/qdx/qs_batch_run_protein_prep/1B39_nohet.pdb
preparing /home/machineer/qdx/qs_batch_run_protein_prep/4QXI_nohet.pdb
2024-03-18 13:22:53,196 - rush - INFO - Argument 95db10da-651c-4af2-a151-d58263fe0052 is now ModuleInstanceStatus.RESOLVING
2024-03-18 13:22:53,221 - rush - INFO - Argument 01960dc1-1437-4a72-90db-01180c10788c is now ModuleInstanceStatus.RESOLVING
2024-03-18 13:22:53,224 - rush - INFO - Argument 4e6500ab-6a1d-476b-842d-9539f0f3a013 is now ModuleInstanceStatus.RESOLVING
2024-03-18 13:22:53,252 - rush - INFO - Argument 155d40cd-a14c-44a5-83f8-8b00bbd9e14f is now ModuleInstanceStatus.RESOLVING
2024-03-18 13:22:53,282 - rush - INFO - Argument 1de12415-b48c-45b7-9981-92ef9685daed is now ModuleInstanceStatus.RESOLVING
2024-03-18 13:22:53,286 - rush - INFO - Argument adf7e0c4-b2a3-4dba-8fcd-d65113a716bf is now ModuleInstanceStatus.RESOLVING
2024-03-18 13:23:07,384 - rush - INFO - Arg

[PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_8FSU_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_1B39_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_4QXI_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_8FSU_nohet_prepared.pdb'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_1B39_nohet_prepared.pdb'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_4QXI_nohet_prepared.pdb')]

In [None]:
OUTPUT_DIR = client.workspace / "objects"
!ls $OUTPUT_DIR

protein_1B39_nohet_prepared.pdb        protein_4QXI_nohet_prepared.qdxf.json
protein_1B39_nohet_prepared.qdxf.json  protein_8FSU_nohet_prepared.pdb
protein_4QXI_nohet_prepared.pdb        protein_8FSU_nohet_prepared.qdxf.json
