# Fold Candidate Sequences with Boltz-2

In [13]:
import pandas as pd
import os

In [14]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')
antigens_df = pd.read_excel('../candidates.xlsx', sheet_name='Antigens')

## Filter candidates based on Chotia test
candidates_df = candidates_df[candidates_df['test_chotia_pass'] == True]

## Get the antigen sequence for Nipah Glycoprotein G
antigen_seq = antigens_df.loc[antigens_df['antigen_id'] == 'nipah_gpG', 'antigen_sequence'].values[0]
print(f"Found {len(candidates_df)} Candidate Antibodies.\nUsing Antigen Sequence: {antigen_seq}")

Found 29 Candidate Antibodies.
Using Antigen Sequence: ICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCTH


In [15]:
import yaml

## Define function to create Boltz-2 YAML configuration for multimer folding

def create_boltz_yaml(h_chain_seq: str, l_chain_seq: str, antigen_seq: str, output_path: str):
    config_json = {
        "version": 1,
        "sequences": [
            {
            "protein": {
                "id": "H",
                "sequence": h_chain_seq
            }
            },
            {
            "protein": {
                "id": "L",
                "sequence": l_chain_seq
            }
            },
            {
            "protein": {
                "id": "A",
                "sequence": antigen_seq
            }
            }
        ]
        }
    
    ## Make directories if they don't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    ## Write YAML file
    with open(output_path, 'w') as f:
        yaml.dump(config_json, f)

In [16]:
## Define a function for running Boltz-2 folding from the command line
def run_boltz_folding(yaml_config_path: str, output_dir: str, accelerator: str = 'cpu'):
    boltz_command = f"boltz predict {yaml_config_path} --out_dir {os.path.dirname(output_dir)} --use_msa_server --accelerator {accelerator}"

    os.system(boltz_command)

In [20]:
## Loop to create YAML files for Boltz-2 folding

for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Generating YAML file for Candidate: {antibody_id}")
    
    output_dir = f'../data/candidates/structures_boltz2'
    yaml_config_path = f"{output_dir}/{antibody_id}.yaml"

    ## See if the yaml file for this antibody already exists
    if os.path.exists(yaml_config_path):
        print(f"\tYAML file for {antibody_id} already exists, skipping YAML creation.")
        continue

    ## Create YAML file
    create_boltz_yaml(row['h_chain'], row['l_chain'], antigen_seq, yaml_config_path)

Generating YAML file for Candidate: sbio-nipahgpg-001
	YAML file for sbio-nipahgpg-001 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-002
	YAML file for sbio-nipahgpg-002 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-003
	YAML file for sbio-nipahgpg-003 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-004
	YAML file for sbio-nipahgpg-004 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-005
	YAML file for sbio-nipahgpg-005 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-008
	YAML file for sbio-nipahgpg-008 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-009
	YAML file for sbio-nipahgpg-009 already exists, skipping YAML creation.
Generating YAML file for Candidate: sbio-nipahgpg-010
	YAML file for sbio-nipahgpg-010 already exists, skipping

In [None]:
## Loop to run Boltz-2 folding

for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Folding Candidate: {antibody_id}")
    
    output_dir = f'../data/candidates/structures_boltz2'
    yaml_config_path = f"{output_dir}/{antibody_id}.yaml"

    ## See if the Boltz-2 directory for this antibody already exists
    if os.path.exists(f"{output_dir}/boltz_results_{antibody_id}"):
        print(f"\tOutput directory for {antibody_id} already exists, skipping folding.")
        continue

    ## Run Boltz folding
    run_boltz_folding(yaml_config_path, output_dir, accelerator='gpu')

Folding Candidate: sbio-nipahgpg-001
Folding Candidate: sbio-nipahgpg-002
Folding Candidate: sbio-nipahgpg-003
Folding Candidate: sbio-nipahgpg-004
Folding Candidate: sbio-nipahgpg-005
Folding Candidate: sbio-nipahgpg-006
	Skipping Candidate sbio-nipahgpg-006 due to Chotia test failure.
Folding Candidate: sbio-nipahgpg-007
	Skipping Candidate sbio-nipahgpg-007 due to Chotia test failure.
Folding Candidate: sbio-nipahgpg-008
Folding Candidate: sbio-nipahgpg-009
Folding Candidate: sbio-nipahgpg-010
Folding Candidate: sbio-nipahgpg-011
Folding Candidate: sbio-nipahgpg-012
Folding Candidate: sbio-nipahgpg-013
Folding Candidate: sbio-nipahgpg-014
Folding Candidate: sbio-nipahgpg-015
Folding Candidate: sbio-nipahgpg-016
Folding Candidate: sbio-nipahgpg-017
Folding Candidate: sbio-nipahgpg-018
Folding Candidate: sbio-nipahgpg-019
Folding Candidate: sbio-nipahgpg-020
Folding Candidate: sbio-nipahgpg-021
Folding Candidate: sbio-nipahgpg-022
Folding Candidate: sbio-nipahgpg-023
