# Run ipSAE on Boltz-2 Candidate Structures

In [1]:
import pandas as pd
import os

In [2]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')
antigens_df = pd.read_excel('../candidates.xlsx', sheet_name='Antigens')

## Get the antigen sequence for Nipah Glycoprotein G
antigen_seq = antigens_df.loc[antigens_df['antigen_id'] == 'nipah_gpG', 'antigen_sequence'].values[0]
print(f"Using Antigen Sequence: {antigen_seq}")

Using Antigen Sequence: ICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCTH


In [None]:
def calculate_ipsae(
    pae_file_path,
    structure_file_path,
    pae_cutoff=15.0,
    dist_cutoff=15.0,
):
    """
    Calculate ipSAE and related scores for protein-protein interactions.
    SOURCE: https://github.com/adaptyvbio/nipah_ipsae_pipeline/blob/main/Boltz-IPSAE.ipynb

    Parameters:
    -----------
    pae_file_path : str
        Path to the PAE file (JSON for AF2/AF3, NPZ for Boltz1)
    structure_file_path : str
        Path to the structure file (PDB for AF2, mmCIF for AF3/Boltz1)
    pae_cutoff : float
        Cutoff value for PAE in score calculations
    dist_cutoff : float
        Cutoff value for distance in score calculations

    Returns:
    --------
    dict
        Dictionary containing all calculated scores
    """

    os.system(f"python helper_scripts/ipsae.py {pae_file_path} {structure_file_path} {pae_cutoff} {dist_cutoff}")

    print(f"Reading results from {structure_file_path.replace('.cif',  f'_{int(pae_cutoff)}_{int(dist_cutoff)}.txt')}")

    df = pd.read_csv(structure_file_path.replace('.cif', f'_{int(pae_cutoff)}_{int(dist_cutoff)}.txt'))
    results = {}


    for i, row in df[df.Type=="max"].iterrows():
        chainpair = f"{row['Chn1']}-{row['Chn2']}"

        results[chainpair] = {
            "max": {
                **{col: row[col] for col in df.columns[5:-1]}
            }
        }
        mask = (df['Chn1'] == row['Chn1']) & (df['Chn2'] == row['Chn2']) & (df['Type'] != "max")
        min_vals = df[mask][df.columns[5:-1]].min()
        results[chainpair]["min"] = min_vals.to_dict()

    return results
   

In [None]:
## Loop through each candidate and run the ipsae script
scores_df = pd.DataFrame(
    columns=[
        'antibody_id',
        'min_ipsae',
        'max_ipsae'
        ]
    )

for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Scoring Boltz-2 outputs for Candidate: {antibody_id}")
    prediction_dir = f'../data/candidates/structures_boltz2_frankenchain/boltz_results_{antibody_id}/predictions/{antibody_id}/'
    stem_name = f'{antibody_id}_model_0'

    pae_file_path = os.path.join(prediction_dir, f'pae_{stem_name}.npz')
    structure_file_path = os.path.join(prediction_dir, f'{stem_name}.cif')

    ## Check if prediction directory exists
    if not os.path.exists(prediction_dir):
        print(f"Prediction directory not found for {antibody_id}, skipping...")
        continue

    try:
        score_results = calculate_ipsae(pae_file_path, structure_file_path, pae_cutoff=15.0, dist_cutoff=15.0)

        ipsae_dict = {
            'antibody_id': antibody_id,
            'min_ipsae': score_results['A-B']['min']['ipSAE'],
            'max_ipsae': score_results['A-B']['max']['ipSAE']
        }

        ipsae_df = pd.DataFrame([ipsae_dict])

        scores_df = pd.concat([scores_df, ipsae_df], ignore_index=True)
    except Exception as e:
        print(f"Error processing {antibody_id}: {e}")
        continue




Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-001
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-001/predictions/sbio-nipahgpg-001/sbio-nipahgpg-001_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-002


  scores_df = pd.concat([scores_df, ipsae_df], ignore_index=True)


Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-002/predictions/sbio-nipahgpg-002/sbio-nipahgpg-002_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-003
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-003/predictions/sbio-nipahgpg-003/sbio-nipahgpg-003_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-004
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-004/predictions/sbio-nipahgpg-004/sbio-nipahgpg-004_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-005
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-005/predictions/sbio-nipahgpg-005/sbio-nipahgpg-005_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-006
Prediction directory not found for sbio-nipahgpg-006, skipping...
Scoring Boltz-2 outputs for Ca

Traceback (most recent call last):
  File "/Users/colbyford/Documents/GitHub/Nipah_gpG_Fv_Generation/scripts/helper_scripts/ipsae.py", line 446, in <module>
    plddt =    plddt_boltz1[np.ix_(token_array.astype(bool))]
               ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: index 663 is out of bounds for axis 0 with size 663


Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-014/predictions/sbio-nipahgpg-014/sbio-nipahgpg-014_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-015
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-015/predictions/sbio-nipahgpg-015/sbio-nipahgpg-015_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-016
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-016/predictions/sbio-nipahgpg-016/sbio-nipahgpg-016_model_0_15_15.txt
Error processing sbio-nipahgpg-016: No columns to parse from file
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-017


Traceback (most recent call last):
  File "/Users/colbyford/Documents/GitHub/Nipah_gpG_Fv_Generation/scripts/helper_scripts/ipsae.py", line 446, in <module>
    plddt =    plddt_boltz1[np.ix_(token_array.astype(bool))]
               ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: index 663 is out of bounds for axis 0 with size 663


Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-017/predictions/sbio-nipahgpg-017/sbio-nipahgpg-017_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-018
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-018/predictions/sbio-nipahgpg-018/sbio-nipahgpg-018_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-019
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-019/predictions/sbio-nipahgpg-019/sbio-nipahgpg-019_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-020
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-020/predictions/sbio-nipahgpg-020/sbio-nipahgpg-020_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-021
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahg

Traceback (most recent call last):
  File "/Users/colbyford/Documents/GitHub/Nipah_gpG_Fv_Generation/scripts/helper_scripts/ipsae.py", line 446, in <module>
    plddt =    plddt_boltz1[np.ix_(token_array.astype(bool))]
               ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: index 666 is out of bounds for axis 0 with size 666


Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-025/predictions/sbio-nipahgpg-025/sbio-nipahgpg-025_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-026
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-026/predictions/sbio-nipahgpg-026/sbio-nipahgpg-026_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-027
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-027/predictions/sbio-nipahgpg-027/sbio-nipahgpg-027_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-028
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahgpg-028/predictions/sbio-nipahgpg-028/sbio-nipahgpg-028_model_0_15_15.txt
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-029
Reading results from ../data/candidates/structures_boltz2_frankenchain/boltz_results_sbio-nipahg

In [16]:
scores_df

Unnamed: 0,antibody_id,min_ipsae,max_ipsae
0,sbio-nipahgpg-001,0.258431,0.335033
1,sbio-nipahgpg-002,0.0,0.005494
2,sbio-nipahgpg-003,0.0,0.0
3,sbio-nipahgpg-004,0.005239,0.080114
4,sbio-nipahgpg-005,0.0,0.005349
5,sbio-nipahgpg-008,0.0,0.01302
6,sbio-nipahgpg-009,0.005282,0.086639
7,sbio-nipahgpg-010,0.0,0.0
8,sbio-nipahgpg-011,0.116301,0.184544
9,sbio-nipahgpg-012,0.0,0.004806
