# Run Evaluation Tests on Candidates

In [1]:
import pandas as pd
import os

In [2]:
## Read in candidates data
candidates_df = pd.read_excel('../candidates.xlsx', sheet_name='Antibody Candidates')
antigens_df = pd.read_excel('../candidates.xlsx', sheet_name='Antigens')

## Get the antigen sequence for Nipah Glycoprotein G
antigen_seq = antigens_df.loc[antigens_df['antigen_id'] == 'nipah_gpG', 'antigen_sequence'].values[0]
print(f"Using Antigen Sequence: {antigen_seq}")

Using Antigen Sequence: ICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCTH


## iPSAE

In [None]:
## python ipsae.py <path_to_boltz1_pae_npz_file> <path_to_boltz1_cif_file> <pae_cutoff> <dist_cutoff>
## python ipsae.py pae_AURKA_TPX2_model_0.npz  AURKA_TPX2_model_0.cif 10 10


# python ipsae.py <path_to_af3_json_file> <path_to_af3_cif_file> <pae_cutoff> <dist_cutoff>                    
# python ipsae.py fold_aurka_tpx2_full_data_0.json fold_aurka_tpx2_model_0.cif 10 10

# python ipsae.py fold_sbio_nipahgpg_001_full_data_3.json fold_sbio_nipahgpg_001_model_3.cif 10 10



# python ipsae.py ../../data/candidates/structures_af3/fold_pdb_2vsm/fold_pdb_2vsm_full_data_0.json ../../data/candidates/structures_af3/fold_pdb_2vsm/fold_pdb_2vsm_model_0.cif 10 10

In [None]:
# ## Loop through each candidate and run the ipsae script
# for idx, row in candidates_df.iterrows():
#     antibody_id = row['antibody_id']
#     print(f"Running iPSAE on Candidate: {antibody_id}")
#     prediction_dir = f'../data/candidates/boltz_results_{antibody_id}/predictions/{antibody_id}'

#     npz_file = os.path.join(prediction_dir, f'pae_{antibody_id}_model_0.npz')
#     cif_file = os.path.join(prediction_dir, f'{antibody_id}_model_0.cif')

#     ## If the files do not exist, skip
#     if not os.path.exists(npz_file) or not os.path.exists(cif_file):
#         print(f"Files not found for {antibody_id}, skipping...")
#         continue

#     command = f'python helper_scripts/ipsae.py {npz_file} {cif_file} 10 10'
#     os.system(command)


In [3]:
## Loop through each candidate and run the ipsae script
for idx, row in candidates_df.iterrows():
    antibody_id = row['antibody_id']
    print(f"Scoring Boltz-2 outputs for Candidate: {antibody_id}")
    prediction_dir = f'../data/candidates/boltz_results_{antibody_id}/predictions/{antibody_id}/'
    stem_name = f'{antibody_id}_model_0'

    ## Check if prediction directory exists
    if not os.path.exists(prediction_dir):
        print(f"Prediction directory not found for {antibody_id}, skipping...")
        continue

    command = f'python helper_scripts/boltz_ipsae_score.py {prediction_dir} {stem_name}'
    os.system(command)

Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-001
{'average_pae': 12.403783, 'average_pde': 4.492502, 'average_plddt': 0.9237351, 'ipsae_score': 0.49246241186062495, 'pae_score': 0.5865405718485515, 'pde_score': 0.8502499262491862, 'plddt_score': 0.009237350821495056}
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-002
{'average_pae': 13.11187, 'average_pde': 5.195256, 'average_plddt': 0.9222311, 'ipsae_score': 0.4759892000754674, 'pae_score': 0.5629376729329427, 'pde_score': 0.8268247922261556, 'plddt_score': 0.009222310781478883}
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-003
{'average_pae': 5.6820803, 'average_pde': 1.4713136, 'average_plddt': 0.90449667, 'ipsae_score': 0.6122392838001252, 'pae_score': 0.8105973243713379, 'pde_score': 0.9509562134742737, 'plddt_score': 0.00904496669769287}
Scoring Boltz-2 outputs for Candidate: sbio-nipahgpg-004
{'average_pae': 6.7126575, 'average_pde': 1.9524444, 'average_plddt': 0.9116491, 'ipsae_score': 0.5937084036270778, '