In [2]:
#Load dependencies
from rdkit import Chem
from rdkit.Chem import DataStructs
from rdkit.Chem import Draw

In [3]:
# Load the reference molecule
# we need to add alpha-keto glutarate as reference structure (negatively charged 2-)
ref = Chem.MolFromSmiles('O=C([O-])CCC(=O)C(=O)[O-]')
fp1 = Chem.RDKFingerprint(ref)

Draw.MolToImage(ref).show()

In [5]:
# Load the molecules from the SDF file
suppl = Chem.SDMolSupplier('set_1.sdf')

# Create a list to store similar molecules
similar_molecules = []

# Calculate similarity for each molecule and store if similarity is above 0.4
for mol in suppl:
    if mol is not None:  # Check if the molecule was loaded successfully
        fp2 = Chem.RDKFingerprint(mol)
        similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
        if similarity >= 0.4:
            similar_molecules.append((mol, similarity))

# Sort similar molecules based on similarity
similar_molecules.sort(key=lambda x: x[1], reverse=True)

# Create a new SD writer for the sorted molecules
w = Chem.SDWriter('sorted_molecules.sdf')

# Write the sorted molecules to the new .sdf file and plot them
for mol, similarity in similar_molecules:
    # Write the molecule to the new .sdf file
    w.write(mol)

    # Plot the molecule
    Draw.MolToImage(mol).show()

# Close the writer
w.close()