In [None]:
# Charges adjusted to match RDKit Standardized Database
rdkit_atom_0_query_adjusted_smiles = [
    "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)[O-]", # penicillin
    "CC(C)(C)C(NC(=O)C(F)(F)F)C(=O)N1CC2C(C1C(=O)NC(C#N)CC1CCNC1=O)C2(C)C", # nirmatrelvir
    "Cc1cn(C2CC(N=[N+]=[N-])C(CO)O2)c(=O)[nH]c1=O", # azt
    "CCN(CC)C(=O)C1C=C2c3cccc4[nH]cc(c34)CC2[NH+](C)C1", # lsd
    "CCC(=O)N(c1ccccc1)C1CC[NH+](CCc2ccccc2)CC1", # fentanyl
    "Nc1c(S(=O)(=O)[O-])cc(Nc2ccccc2)c2c1C(=O)c1ccccc1C2=O", # acid blue 25 free acid
    "COc1ccc(C(=O)CC(=O)c2ccc(C(C)(C)C)cc2)cc1", # avobenzone
    "c1ccc(N(c2ccccc2)c2ccc3c(c2)[nH]c2ccccc23)cc1", # 2-diphenylaminocarbazole
]

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.figure import figaspect
from difflib import SequenceMatcher
from transformers import AutoTokenizer
plt.rcParams["font.family"] = "serif"
plt.rcParams["mathtext.fontset"] = "dejavuserif"


In [None]:
from rdkit import Chem, DataStructs
def tanimoto(smi1, smi2):
    """
    Code from https://medium.com/data-professor/how-to-calculate-molecular-similarity-25d543ea7f40
    """
    mol1 = Chem.MolFromSmiles(smi1)
    mol2 = Chem.MolFromSmiles(smi2)
    fp1 = Chem.RDKFingerprint(mol1)
    fp2 = Chem.RDKFingerprint(mol2)
    # fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, useChirality = False, radius = 3, nBits=2048)
    # fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, useChirality = False, radius = 3, nBits=2048)
    s = round(DataStructs.TanimotoSimilarity(fp1,fp2),2)
    return s

In [None]:

names_list = [
    "Penicillin G",
    "Nirmatrelvir",
    "Zidovudine",
    "LSD",
    "Fentanyl",
    "Acid Blue 25 FA",
    "Avobenzone",
    "2-dPAC"
]



arr = np.zeros((8,8))
for count_x, smi_x in enumerate(rdkit_atom_0_query_adjusted_smiles):
    for count_y, smi_y in enumerate(rdkit_atom_0_query_adjusted_smiles):
        arr[count_x, count_y] = tanimoto(smi_x, smi_y)

fig, ax = plt.subplots()
cmap = sns.color_palette("viridis", as_cmap=True)
ax = sns.heatmap(arr, 
                vmin=0, 
                vmax=1, 
                cmap=cmap,
                xticklabels= names_list,
                yticklabels= names_list,
                cbar_kws={"label": "Fingerprint Tanimoto Similarity"},
            #  cbar=False,
                annot=True,
                square=True,
                linewidths=0.5, 
                )
ax.xaxis.tick_top() # x axis on top
ax.tick_params(length=0)
plt.setp(ax.get_yticklabels(), rotation=0, ha="right",
        rotation_mode="anchor")
plt.setp(ax.get_xticklabels(), rotation=45, ha="left",
        rotation_mode="anchor")
plt.savefig(f"query_fingerprint_matrix.png", dpi=300, bbox_inches = "tight")