# Run HADDOCK3 on Boltz-2 Candidate Structures

In [1]:
import pandas as pd
import os, re, subprocess

In [2]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')

In [3]:
## HADCOCK3 scoring function
def haddock3_score(pdb_path:str) -> dict:

  repo_dir = '/Users/colby/Nipah_gpG_Fv_Generation'
  try:
    ## Run haddock3-score CLI
    # command = ["haddock3-score", "--full", pdb_path]
    docker_command = f"docker run -v {repo_dir}:/inputs --rm cford38/haddock:3-2024.10.0b6 haddock3-score --full {pdb_path}"
    command = docker_command.split()

    sp_result = subprocess.run(command, capture_output=True, text=True, check=True)

    ## Parse result
    metrics = {}

    ## Extract HADDOCK score
    match = re.search(r"HADDOCK-score \(emscoring\) = ([\-\d\.]+)", sp_result.stdout)
    if match:
        metrics["score"] = float(match.group(1))

    ## Extract individual energy terms
    matches = re.findall(r"(\w+)=([\-\d\.]+)", sp_result.stdout)
    for key, value in matches:
        metrics[key] = float(value)

    ## Calculate total score
    metrics["total"] = metrics["vdw"] + metrics["elec"]

    ## Remove air
    del metrics["air"]

    return metrics

  except subprocess.CalledProcessError as e:
    print("HADDOCK3 Error occurred:", e.stderr)
    return {}

In [None]:
# docker run -v ./:/inputs -it --rm cford38/haddock:3-2024.10.0b6 haddock3-score --full /inputs/data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-148/predictions/sbio-nipahgpg-148/sbio-nipahgpg-148_model_0.pdb

In [8]:
## Loop through each candidate and run the ipsae script
haddock_df = pd.DataFrame(
    columns=[
        'antibody_id',
        ]
    )

for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Running HADDOCK for Candidate: {antibody_id}")
    # pdb_path = os.path.abspath(f'../data/candidates/structures_boltz2_frankenchain/boltz_results_{antibody_id}/predictions/{antibody_id}/{antibody_id}_model_0.pdb')
    # pdb_path = f'/inputs/data/candidates/structures_boltz2_frankenchain/boltz_results_{antibody_id}/predictions/{antibody_id}/{antibody_id}_model_0.pdb'
    pdb_path = f'/inputs/data/candidates/structures_boltz2_frankenchain_comp/boltz_results_{antibody_id}/predictions/{antibody_id}/{antibody_id}_model_0.pdb'



    ## Check if prediction directory exists
    # if not os.path.exists(pdb_path):
    #     print(f"Prediction directory not found for {antibody_id}, skipping...")
    #     continue

    print(f"Scoring PDB file at: {pdb_path}")

    haddock_dict = haddock3_score(pdb_path = pdb_path)
    haddock_df_row = pd.DataFrame([haddock_dict])
    haddock_df_row['antibody_id'] = antibody_id,
    # print(haddock_dict)

    haddock_df = pd.concat([haddock_df, haddock_df_row], ignore_index=True)


Running HADDOCK for Candidate: sbio-nipahgpg-001
Scoring PDB file at: /inputs/data/candidates/structures_boltz2_frankenchain_comp/boltz_results_sbio-nipahgpg-001/predictions/sbio-nipahgpg-001/sbio-nipahgpg-001_model_0.pdb
Running HADDOCK for Candidate: sbio-nipahgpg-002
Scoring PDB file at: /inputs/data/candidates/structures_boltz2_frankenchain_comp/boltz_results_sbio-nipahgpg-002/predictions/sbio-nipahgpg-002/sbio-nipahgpg-002_model_0.pdb
Running HADDOCK for Candidate: sbio-nipahgpg-003
Scoring PDB file at: /inputs/data/candidates/structures_boltz2_frankenchain_comp/boltz_results_sbio-nipahgpg-003/predictions/sbio-nipahgpg-003/sbio-nipahgpg-003_model_0.pdb
Running HADDOCK for Candidate: sbio-nipahgpg-004
Scoring PDB file at: /inputs/data/candidates/structures_boltz2_frankenchain_comp/boltz_results_sbio-nipahgpg-004/predictions/sbio-nipahgpg-004/sbio-nipahgpg-004_model_0.pdb
Running HADDOCK for Candidate: sbio-nipahgpg-005
Scoring PDB file at: /inputs/data/candidates/structures_boltz2_

In [9]:
haddock_df

Unnamed: 0,antibody_id,score,vdw,elec,desolv,bsa,total
0,sbio-nipahgpg-001,-83.9570,-59.289000,-18.77880,-20.912200,1970.72,-78.067800
1,sbio-nipahgpg-002,-57.4166,-38.811300,-77.82330,-3.040640,1541.35,-116.634600
2,sbio-nipahgpg-003,13.2364,2.957890,-24.83250,15.245000,1903.60,-21.874610
3,sbio-nipahgpg-004,-32.3134,0.874296,-258.98100,18.608500,2535.76,-258.106704
4,sbio-nipahgpg-005,-69.4488,-54.589600,-72.21090,-0.417022,1706.01,-126.800500
...,...,...,...,...,...,...,...
153,sbio-nipahgpg-156,-70.9666,-10.044300,-185.20600,-23.881100,2418.43,-195.250300
154,sbio-nipahgpg-157,-98.2460,-41.757900,-23.23020,-51.842100,2633.19,-64.988100
155,sbio-nipahgpg-158,-142.9182,-52.643000,-307.98600,-28.678000,3162.04,-360.629000
156,sbio-nipahgpg-159,-134.0713,-59.569300,-66.74620,-61.152800,3342.05,-126.315500


In [10]:
# haddock_df.to_csv('../data/candidates/structures_boltz2_frankenchain/haddock3_scores.csv', index=False)
haddock_df.to_csv('../data/candidates/structures_boltz2_frankenchain_comp/haddock3_scores.csv', index=False)