# Antibody-Antigen Contact Detection with PandaProt
## Parallelized

In [None]:
import subprocess
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import sys

In [2]:
sequences_df = pd.read_csv("sabdab_sequences.csv")
base_path_to_pdbs = "./pdbs"

In [31]:
def run_job(pdb_file, h_chain_id, l_chain_id, antigen_ids, antigen_seqs, output_file):
    cmd = [
        sys.executable, "get_contacts.py",
        "--pdb_file", pdb_file,
        "--h_chain_id", h_chain_id,
        "--l_chain_id", l_chain_id,
        "--antigen_ids", antigen_ids,
        "--antigen_seqs", antigen_seqs,
        "--output_file", output_file
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    return {
        "stdout": result.stdout,
        "stderr": result.stderr,
        "returncode": result.returncode
    }

In [None]:
## Test 1
run_job(
    pdb_file="./pdbs_test/1a2y.pdb.gz",
    h_chain_id="B",
    l_chain_id="A",
    antigen_ids="C",
    antigen_seqs="KVFGRCELAAAMKRHGLANYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL",
    output_file="./pdbs_test/1a2y_contacts.csv"
)   

{'stdout': '',
 'stderr': '2025-07-21 16:58:10,788 - INFO - Processing ./pdbs_test/1a2y.pdb.gz for antibody chains B, A with antigen chain(s) C\n2025-07-21 16:58:10,791 - INFO - Unzipped input PDB to temporary file C:\\Users\\Colby\\AppData\\Local\\Temp\\tmp597vi1lf.pdb\n2025-07-21 16:58:10,874 - INFO - 1. Running PandaProt analysis...\n2025-07-21 16:58:24,130 - INFO - 2. Running BioPandas renumbering...\n2025-07-21 16:58:24,134 - INFO - 3. Highlighting epitope residues in sequence...\n2025-07-21 16:58:24,146 - INFO - DONE: 1a2y.pdb processed. Results saved to ./pdbs_test/1a2y_contacts_sub.csv\n',
 'returncode': 0}

In [None]:
with ThreadPoolExecutor(max_workers=24) as executor:
    futures = []
    for index, row in sequences_df.iterrows():
        pdb_id = row['pdb_id']
        h_chain_id = row['h_chain_id']
        l_chain_id = row['l_chain_id']
        antigen_ids = row['antigen_ids']
        antigen_seqs = row['antigen_seqs']
        pdb_file = f"{base_path_to_pdbs}/{pdb_id}.pdb.gz"
        output_file = f"./contacts/{pdb_id}_contacts.csv"
        
        futures.append(executor.submit(run_job, pdb_file, h_chain_id, l_chain_id, antigen_ids, antigen_seqs, output_file))

In [33]:
for future in futures:
    print(future.result())

TypeError: run_job() missing 1 required positional argument: 'output_file'