# Fold Candidate Sequences with ESM3

In [4]:
import pandas as pd
import os

In [7]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')
antigens_df = pd.read_excel('../candidates.xlsx', sheet_name='Antigens')

## Get the antigen sequence for Nipah Glycoprotein G
antigen_seq = antigens_df.loc[antigens_df['antigen_id'] == 'nipah_gpG', 'antigen_sequence'].values[0]
print(f"Using Antigen Sequence: {antigen_seq}")

Using Antigen Sequence: ICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCTH


## ...with Boltz-2

In [8]:
import yaml

## Define function to create Boltz-2 YAML configuration for multimer folding

def create_boltz_yaml(h_chain_seq: str, l_chain_seq: str, antigen_seq: str, output_path: str):
    config_json = {
        "version": 1,
        "sequences": [
            {
            "protein": {
                "id": "H",
                "sequence": h_chain_seq
            }
            },
            {
            "protein": {
                "id": "L",
                "sequence": l_chain_seq
            }
            },
            {
            "protein": {
                "id": "A",
                "sequence": antigen_seq
            }
            }
        ]
        }
    
    ## Make directories if they don't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    ## Write YAML file
    with open(output_path, 'w') as f:
        yaml.dump(config_json, f)

In [9]:
## Define function for running Boltz-2 folding using the cford38 Docker image

# def run_boltz_folding(yaml_config_path: str, output_pdb_path: str):
#     docker_command = f"docker run --gpus all -v ./:/mnt/ --name boltz --rm -it cford38/boltz:2.1.1_withweights boltz predict {yaml_config_path} --out_dir /mnt/{os.path.dirname(output_pdb_path)} --use_msa_server"

#     os.system(docker_command)


## Define a function for running Boltz-2 folding from the command line
def run_boltz_folding(yaml_config_path: str, output_dir: str, accelerator: str = 'cpu'):
    boltz_command = f"boltz predict {yaml_config_path} --out_dir {os.path.dirname(output_dir)} --use_msa_server --accelerator {accelerator}"

    os.system(boltz_command)

In [None]:
for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Folding Candidate: {antibody_id}")
    output_dir = f'../data/candidates/structures_boltz2'

    test_chotia_pass = row['test_chotia_pass']
    if not test_chotia_pass:
        print(f"\tSkipping Candidate {row['antibody_id']} due to Chotia test failure.")
        continue

    ## Create YAML file
    yaml_config_path = f"{output_dir}/{antibody_id}.yaml"
    create_boltz_yaml(row['h_chain'], row['l_chain'], antigen_seq, yaml_config_path)

    ## Run Boltz folding
    run_boltz_folding(yaml_config_path, output_dir)