# prepare_protein — Run a batch protein preperation in parallel

See [the tutorial](/Tutorials/batch_run-protein-prep.ipynb).

This quickstart uses the async functionality to download files in parallel. If running outside of a Juptyter notebook, you will need to wrap the code in an async main function like this: 
``` python
import asyncio

async def main():
    #your code here
    
asyncio.run(main())
```

In [None]:
# |hide
import os
import pathlib

WORK_DIR = pathlib.Path("~/qdx/qs_batch_run_protein_prep").expanduser()
if WORK_DIR.exists():
    !rm -r $WORK_DIR
os.makedirs(WORK_DIR, exist_ok=True)
os.chdir(WORK_DIR)

In [None]:
# Get PDBs to work with
# We use the pdb-tools cli here but you can download directly from rcsb.org
!pdb_fetch '1b39' | pdb_delhetatm > '1B39_nohet.pdb'
!pdb_fetch '4qxi' | pdb_delhetatm > '4QXI_nohet.pdb'
!pdb_fetch '8fsu' | pdb_delhetatm > '8FSU_nohet.pdb'

In [None]:
import asyncio
from glob import glob
from pathlib import Path

import rush

client = await rush.build_provider_with_functions(
    batch_tags=["batch_run_protein_prep"]
)

protein_outputs = []
for protein_path in glob(str(Path.cwd() / "*.pdb")):
    print("preparing", protein_path)
    protein_path = Path(protein_path)
    name = protein_path.stem

    prepped_protein_qdxf, prepped_protein_pdb = await client.prepare_protein(
        protein_path,
        None,
        None,
        tags=["batch_run_protein_prep"],
        resources={"gpus": 1, "storage": "10", "storage_units": "MB"},
    )
    protein_outputs.append((name, prepped_protein_qdxf, prepped_protein_pdb))

await asyncio.gather(
    *(
        [
            output[1].download(filename=f"protein_{output[0]}_prepared.qdxf.json")
            for output in protein_outputs
        ]
        + [
            output[2].download(filename=f"protein_{output[0]}_prepared.pdb")
            for output in protein_outputs
        ]
    )
)

2024-04-10 15:46:57,187 - rush - INFO - Not restoring by default via default
preparing /home/machineer/qdx/qs_batch_run_protein_prep/8FSU_nohet.pdb
preparing /home/machineer/qdx/qs_batch_run_protein_prep/1B39_nohet.pdb
preparing /home/machineer/qdx/qs_batch_run_protein_prep/4QXI_nohet.pdb
2024-04-10 15:47:03,897 - rush - INFO - Argument feae45e5-6d88-465f-bc71-722848c6d06e is now ModuleInstanceStatus.RESOLVING
2024-04-10 15:47:03,961 - rush - INFO - Argument 75bb4f40-0ce6-4093-8add-475d1f38e226 is now ModuleInstanceStatus.RESOLVING
2024-04-10 15:47:03,992 - rush - INFO - Argument 71189844-b1a3-40c3-8e2e-a2e4d2eace28 is now ModuleInstanceStatus.RESOLVING
2024-04-10 15:47:04,004 - rush - INFO - Argument 770a9c09-030a-4d12-8e19-252787039875 is now ModuleInstanceStatus.RESOLVING
2024-04-10 15:47:04,035 - rush - INFO - Argument d280af91-5816-4f32-8120-e6e2ede8b4b1 is now ModuleInstanceStatus.RESOLVING
2024-04-10 15:47:04,045 - rush - INFO - Argument ea6e2e0f-aa8f-4d20-9192-c2f099859878 is n

[PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_8FSU_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_1B39_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_4QXI_nohet_prepared.qdxf.json'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_8FSU_nohet_prepared.pdb'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_1B39_nohet_prepared.pdb'),
 PosixPath('/home/machineer/qdx/qs_batch_run_protein_prep/objects/protein_4QXI_nohet_prepared.pdb')]

In [None]:
OUTPUT_DIR = client.workspace / "objects"
!ls $OUTPUT_DIR

protein_1B39_nohet_prepared.pdb        protein_4QXI_nohet_prepared.qdxf.json
protein_1B39_nohet_prepared.qdxf.json  protein_8FSU_nohet_prepared.pdb
protein_4QXI_nohet_prepared.pdb        protein_8FSU_nohet_prepared.qdxf.json
