# Submit

Collect the fegrow mcs results + the default run and make a single submission filling the gaps in the mcs run using the default.

In [1]:
from rdkit import Chem
import pathlib
import polaris as po

competition = po.load_competition("asap-discovery/antiviral-ligand-poses-2025")

In [2]:
train, test = competition.get_train_test_split()

In [7]:
# load our best results
mcs_mers_folder = pathlib.Path("full_run-MERS/outputs")
mcs_mers_ligands = []
for lig_dir in mcs_mers_folder.glob("*"):
    if "ligand" in lig_dir.name:
        best_pose = Chem.MolFromMolFile(lig_dir.joinpath("best_pose.sdf"))
        if best_pose is not None:
            mcs_mers_ligands.append(best_pose)
            

In [8]:
mcs_sars_ligands = []
supplier = Chem.SDMolSupplier("sars_run_mcs/best_sars.sdf")
for mol in supplier:
    mcs_sars_ligands.append(mol)

In [9]:
# load the mixture of mols
default_submission_ligands = []
supplier = Chem.SDMolSupplier("default_fegrow/submitted_default.sdf")
for mol in supplier:
    default_submission_ligands.append(mol)

In [10]:
# check how many ligands we have
len(mcs_mers_ligands), len(mcs_sars_ligands), len(default_submission_ligands)

(62, 83, 195)

In [12]:
# make quick lookups for the ligands
sars_by_smiles, mers_by_smiles = {}, {}
for mol in mcs_sars_ligands:
    smiles = Chem.MolToSmiles(mol)
    can_smiles = Chem.CanonSmiles(smiles)
    sars_by_smiles[can_smiles] = mol
    
for mol in mcs_mers_ligands:
    smiles = Chem.MolToSmiles(mol)
    can_smiles = Chem.CanonSmiles(smiles)
    mers_by_smiles[can_smiles] = mol

In [13]:
# lost one mers molecule?
len(sars_by_smiles), len(mers_by_smiles)

(83, 61)

In [14]:
# make a lookup for the default
default_mol_by_smiles = {}
for mol in default_submission_ligands:
    smiles = Chem.MolToSmiles(mol)
    can_smiles = Chem.CanonSmiles(smiles)
    default_mol_by_smiles[can_smiles] = mol

In [17]:
# duplicate ligands?
len(default_mol_by_smiles)

161

In [34]:
# build our final predicted list
import pandas as pd
prediction_data = []
best_predictions = []
for test_entry in test:
    smiles = Chem.CanonSmiles(test_entry["CXSMILES"])
    label = test_entry["Protein Label"]
    best_mol = None
    row_data = {"SMILES": smiles, "Protein Label": label}
    if label == "MERS-CoV Mpro":
        # try and grab from mcs mers list
        if smiles in mers_by_smiles:
            print("Using mcs mers for ", smiles)
            row_data["source"] = "mcs"
            best_mol = mers_by_smiles[smiles]
    else:
        if smiles in sars_by_smiles:
            row_data["source"] = "mcs"
            print("Using mcs sars for ", smiles)
            best_mol = sars_by_smiles[smiles]
            
    if best_mol is None:
        # get the mol from the default list
        row_data["source"] = "default"
        print("Using default for ", smiles)
        best_mol = default_mol_by_smiles[smiles]
        
    best_predictions.append(best_mol)
    prediction_data.append(row_data)

Using default for  O=C1c2ccc(Cl)cc2[C@@]2(CN1Cc1nnco1)C(=O)N(c1cncc3ccccc13)C[C@@H]2CNc1ncncn1
Using mcs mers for  COc1ccc(N(Cc2cccc(Cl)c2)C(=O)Cc2cncc3ccccc23)cc1
Using mcs sars for  CNC(=O)CN1C[C@@]2(C(=O)N(c3cncc4ccccc34)C[C@@H]2CNc2ccncn2)c2cc(Cl)ccc2C1=O
Using default for  CNC(=O)CN1C[C@@]2(C(=O)N(c3cncc4ccccc34)C[C@@H]2[NH3+])c2cc(Cl)ccc2C1=O
Using default for  O=C1c2ccc(Cl)cc2[C@@]2(CN1Cc1ccnnc1)C(=O)N(c1cncc3ccccc13)C[C@@H]2CNc1ccncn1
Using mcs sars for  CNC(=O)CN1C[C@@]2(C(=O)N(c3cncc4ccccc34)C[C@@H]2CNc2cnn(C)c2)c2cc(Cl)ccc2C1=O
Using mcs sars for  Cc1cnc(CN2C[C@@]3(C(=O)N(c4cncc5ccccc45)C[C@@H]3C)c3cc(F)ccc3C2=O)cn1
Using mcs sars for  CNC(=O)CN1C[C@@]2(C(=O)N(c3cncc4ccccc34)C[C@@H]2COC(C)C)c2cc(Cl)ccc2C1=O
Using mcs sars for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(Cc1nccn1C)C(=O)c1ccc(F)cc12
Using mcs mers for  C[C@H]1c2ccsc2CCN1C(=O)Cc1cncc2ccccc12
Using default for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(CC#N)C(=O)c1ccc(F)cc12
Using default for  CCC(=O)[N@H+]1CCCc2c(N(C

Using default for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(Cc1ccncn1)C(=O)c1ccc(F)cc12
Using mcs sars for  O=C(Cc1cncc2ccccc12)N1CCC2(CCC2)CC1
Using mcs sars for  O=C(Cc1cncc2ccccc12)N1CCC(C2CC2)CC1
Using mcs sars for  CNC(=O)CN1C[C@@]2(C(=O)N(c3cncc4ccccc34)C[C@@H]2CO)c2cc(Cl)ccc2C1=O
Using mcs mers for  COC[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(Cc1ncc[nH]1)C(=O)c1ccc(Cl)cc12
Using mcs mers for  O=C(Cc1cncc2ccccc12)N1CCc2sccc2C1
Using mcs sars for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(Cc1nc[nH]n1)C(=O)c1ccc(F)cc12
Using mcs mers for  O=C1c2ccc(Cl)cc2[C@@]2(CN1Cc1ccnnc1)C(=O)N(c1cncc3ccccc13)C[C@@H]2CNc1nncs1
Using mcs sars for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(Cc1cocn1)C(=O)c1ccc(F)cc12
Using mcs mers for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[C@@]12CN(CC[N@@H+]1CCC3(COC3)C1)C(=O)c1ccc(Cl)cc12
Using mcs sars for  CC[C@H]1c2ccsc2CCN1C(=O)Cc1cncc2ccccc12
Using default for  Cn1ccc(CN2C[C@]3(CCN(c4cncc5ccccc45)C3=O)c3cc(Cl)ccc3C2=O)n1
Using mcs mers for  C[C@H]1CN(c2cncc3ccccc23)C(=O)[

In [35]:
df = pd.DataFrame(prediction_data)
len(df[df["source"] == "default"])

52

In [36]:
len(best_predictions)

195

In [37]:
import base64
import datamol as dm

def serialize_rdkit_mol(mol: Chem.Mol):
    props = Chem.PropertyPickleOptions.AllProps
    mol_bytes = mol.ToBinary(props)
    return base64.b64encode(mol_bytes).decode("ascii")

best_predictions_serialized = [serialize_rdkit_mol(mol) for mol in best_predictions]

In [38]:
# check it works
deserialized = Chem.Mol(base64.b64decode(best_predictions_serialized[0].encode("ascii")))
dm.same_mol(best_predictions[0], deserialized)

True

In [39]:
# write out the single sdf file
writer = Chem.SDWriter("mcs_submitted.sdf")
for mol in best_predictions:
    writer.write(mol)

In [40]:
len(best_predictions)

195

In [42]:
competition.submit_predictions(
    predictions=best_predictions_serialized,
    prediction_name="openfe-newcastle-edinburgh-mcs-fegrow",
    prediction_owner="jthorton",
    report_url="https://github.com/jthorton/polaris_fegrow_mcs",
    github_url="https://github.com/jthorton/polaris_fegrow_mcs",
    description="FEGrow with mcs core structure finder.",
    tags=["FEGrow", "ANI", "OpenMM", "RDKit", "Sage", "OpenFF"],
    user_attributes={"Framework": "FEGrow-MCS", "Method": "Constrained geometry optimisation with ML/MM"}
)