# Fold Candidate Sequences with Boltz-2 (Fv Frankenchains)

In [None]:
import pandas as pd
import os
# import boltz ## Just to check

In [4]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')
antigens_df = pd.read_excel('../candidates.xlsx', sheet_name='Antigens')

## Filter candidates based on Chotia test
candidates_df = candidates_df[candidates_df['test_chotia_pass'] == True]

## Get the antigen sequence for Nipah Glycoprotein G
antigen_seq = antigens_df.loc[antigens_df['antigen_id'] == 'nipah_gpG_compout', 'antigen_sequence'].values[0]
print(f"Found {len(candidates_df)} Candidate Antibodies.\nUsing Antigen Sequence: {antigen_seq}")

Found 157 Candidate Antibodies.
Using Antigen Sequence: MPAENKKVRFENTTSDKGKIPSKVIKSYYGTMDIKKINEGLLDSKILSAFNTVIALLGSIVIIVMNIMIIQNYTRSTDNQAVIKDALQGIQQQIKGLADKIGTEIGPKVSLIDTSSTITIPANIGLLGSKISQSTASINENVNEKCKFTLPPLKIHECNISCPNPLPFREYRPQTEGVSNLVGLPNNICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCT


In [12]:
import yaml

# msa_path = os.path.abspath('../data/fastas/nipah.a3m')
msa_path = '../data/fastas/nipah.a3m'

## Define function to create Boltz-2 YAML configuration for multimer folding
def create_boltz_yaml(franken_chain: str, antigen_seq: str, output_path: str):
    config_json = {
        "version": 1,
        "sequences": [
            {
            "protein": {
                "id": "B",
                "sequence": franken_chain,
                "msa": "empty"
            }
            },
            {
            "protein": {
                "id": "A",
                "sequence": antigen_seq,
                "msa": msa_path
            }
            }
        ]
        }
    
    ## Make directories if they don't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    ## Write YAML file
    with open(output_path, 'w') as f:
        yaml.dump(config_json, f)

In [None]:
## Define a function for running Boltz-2 folding from the command line
def run_boltz_folding(yaml_config_path: str, output_dir: str, accelerator: str = 'cpu'):
    # boltz_command = f"boltz predict {yaml_config_path} --out_dir {os.path.dirname(output_dir)} --use_msa_server --accelerator {accelerator}"
    boltz_command = f"boltz predict {yaml_config_path} --out_dir {os.path.dirname(output_dir)} --write_full_pae --accelerator {accelerator}"
    os.system(boltz_command)

In [13]:
## Loop to create YAML files for Boltz-2 folding

for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Generating YAML file for Candidate: {antibody_id}")
    
    output_dir = f'../data/candidates/structures_boltz2_frankenchain'
    yaml_config_path = f"{output_dir}/{antibody_id}.yaml"

    ## See if the yaml file for this antibody already exists
    if os.path.exists(yaml_config_path):
        print(f"\tYAML file for {antibody_id} already exists, skipping YAML creation.")
        continue

    ## Create YAML file
    create_boltz_yaml(row['franken_chain'], antigen_seq, yaml_config_path)

Generating YAML file for Candidate: sbio-nipahgpg-001
Generating YAML file for Candidate: sbio-nipahgpg-002
Generating YAML file for Candidate: sbio-nipahgpg-003
Generating YAML file for Candidate: sbio-nipahgpg-004
Generating YAML file for Candidate: sbio-nipahgpg-005
Generating YAML file for Candidate: sbio-nipahgpg-008
Generating YAML file for Candidate: sbio-nipahgpg-009
Generating YAML file for Candidate: sbio-nipahgpg-010
Generating YAML file for Candidate: sbio-nipahgpg-011
Generating YAML file for Candidate: sbio-nipahgpg-012
Generating YAML file for Candidate: sbio-nipahgpg-013
Generating YAML file for Candidate: sbio-nipahgpg-014
Generating YAML file for Candidate: sbio-nipahgpg-015
Generating YAML file for Candidate: sbio-nipahgpg-016
Generating YAML file for Candidate: sbio-nipahgpg-017
Generating YAML file for Candidate: sbio-nipahgpg-018
Generating YAML file for Candidate: sbio-nipahgpg-019
Generating YAML file for Candidate: sbio-nipahgpg-020
Generating YAML file for Can

In [None]:
# docker run --gpus all -v .\\:/inputs -it --rm cford38/boltz:2.1.1_withweights bash

In [None]:
## Loop to run Boltz-2 folding

for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Folding Candidate: {antibody_id}")
    ## Define paths
    output_dir = f'../data/candidates/structures_boltz2_frankenchain'
    yaml_config_path = f"{output_dir}/{antibody_id}.yaml"
    ## See if the Boltz-2 directory for this antibody already exists
    if os.path.exists(f"{output_dir}/boltz_results_{antibody_id}"):
        print(f"\tOutput directory for {antibody_id} already exists, skipping folding.")
        continue
    ## Run Boltz folding
    run_boltz_folding(yaml_config_path, output_dir, accelerator='gpu')