# Fold Candidate Sequences with ESM3

In [2]:
import pandas as pd
import os
from esm.sdk import client
from esm.sdk.api import ESMProtein, GenerationConfig

In [3]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')
antigens_df = pd.read_excel('../candidates.xlsx', sheet_name='Antigens')

## Get the antigen sequence for Nipah Glycoprotein G
antigen_seq = antigens_df.loc[antigens_df['antigen_id'] == 'nipah_gpG', 'antigen_sequence'].values[0]
print(f"Using Antigen Sequence: {antigen_seq}")

Using Antigen Sequence: ICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCTH


In [5]:
def fold_sequence(sequence:str, model_name:str, token:str, soc:bool) -> str:
    ## Load the ESM model
    model = client(model=model_name, url="https://forge.evolutionaryscale.ai", token=token)
    ## Prepare the sequence
    sequence = sequence.replace(" ", "").replace("\n", "")
    ## Generate the structure
    try:
        input = ESMProtein(sequence=sequence, potential_sequence_of_concern=soc)
        # input = ESMProtein(sequence=sequence)
        config = GenerationConfig(track="structure", num_steps=10, temperature=0.1)
        generation = model.generate(input, config)
        protein_complex = generation.to_protein_complex()
        pdb_str = protein_complex.to_pdb_string()
        return pdb_str
    except Exception as e:
        print(f"Error folding sequence {sequence}: {e}")
        return ""

In [9]:
## Set ESM API Token in environment variable
os.environ["ESM_API_TOKEN"] = "1pIBroJm2HVLhzL5JmQ7gP"  # Replace with your actual token

In [12]:
## For each candidate, fold the antibody-antigen complex
token = os.getenv("ESM_API_TOKEN")

for idx, row in candidates_df.iterrows():
    model = client(model="esm3-medium-multimer-2024-09", url="https://forge.evolutionaryscale.ai", token=token)

    antibody_id = row['antibody_id']
    print(f"Folding Candidate: {antibody_id}")

    h_chain_seq = row['h_chain']
    l_chain_seq = row['l_chain']

    sequence = f'{h_chain_seq}|{l_chain_seq}|{antigen_seq}'

    # pdb_str = fold_sequence(sequence, "esm3-medium-multimer-2024-09", token, soc=True)

    input = ESMProtein(sequence=sequence, potential_sequence_of_concern=True)
    config = GenerationConfig(track="structure", num_steps=10, temperature=0.1)

    generation = model.generate(input, config)

    protein_complex = generation.to_protein_complex()

    with open(f'../data/candidates/{antibody_id}_complex.pdb', 'w') as f:
        f.write(protein_complex.to_pdb_string())

Folding Candidate: sbio-nipahgpg-001




Folding Candidate: sbio-nipahgpg-002
Folding Candidate: sbio-nipahgpg-003
Folding Candidate: sbio-nipahgpg-004
Folding Candidate: sbio-nipahgpg-005
