# Analysis of recommendations

See the `Demo` notebook for how the targets are generated.

In [66]:
from rdkit import Chem
from rdkit.Chem import Descriptors
import pandas as pd

In [4]:
with open("targets.smi", "r") as read_file:
    smiles = read_file.readlines()
    smiles = [smi.strip() for smi in smiles]

In [96]:
molecules = [Chem.MolFromSmiles(smi, sanitize=True) for smi in smiles]

In [97]:
_ = [mol.UpdatePropertyCache(strict=False) for mol in molecules]

In [106]:
cyanide = Chem.MolFromSmarts("C#N")
alkene = Chem.MolFromSmarts("C=C")
acetylenic = Chem.MolFromSmarts("C#C")
aromatic_c = Chem.MolFromSmarts("c")
sp2_nitrogen = Chem.MolFromSmarts("[N^2]")

In [99]:
def describe(mol, *smarts):
    result = [descriptor(mol) for descriptor in [Descriptors.MolWt, Descriptors.NumHeteroatoms]]
    for smart in smarts:
        result.append(mol.HasSubstructMatch(smart))
    return result

In [107]:
data = [describe(mol, *[cyanide, alkene, acetylenic, aromatic_c, sp2_nitrogen]) for mol in molecules]

In [108]:
df = pd.DataFrame(data, columns=["MW", "Heteroatoms", "Cyanide", "Alkene", "Acetylenic", "Aromatic Carbon", "sp2 Nitrogen"])

In [109]:
df

Unnamed: 0,MW,Heteroatoms,Cyanide,Alkene,Acetylenic,Aromatic Carbon,sp2 Nitrogen
0,102.136,0,False,False,True,False,False
1,80.086,1,False,False,True,False,False
2,103.124,1,True,False,True,False,False
3,27.026,1,True,False,False,False,False
4,118.139,2,True,False,False,True,False
...,...,...,...,...,...,...,...
795,93.085,2,True,False,True,False,False
796,94.113,1,False,False,True,False,False
797,41.053,1,True,False,False,False,False
798,39.057,0,False,False,True,False,False


In [113]:
saturated = df.loc[(df["Alkene"] == False) & (df["Acetylenic"] == False) & (df["Aromatic Carbon"] == False)]

In [115]:
len(df) - len(saturated)

619

In [111]:
df.loc[df["sp2 Nitrogen"] == True]

Unnamed: 0,MW,Heteroatoms,Cyanide,Alkene,Acetylenic,Aromatic Carbon,sp2 Nitrogen
5,54.072,1,False,True,False,False,True
12,64.067,1,False,True,True,False,True
30,57.052,2,False,False,False,False,True
35,63.081,3,False,False,False,False,True
38,55.080,1,False,False,True,False,True
...,...,...,...,...,...,...,...
784,45.041,2,False,False,False,False,True
786,185.182,3,False,False,True,False,True
790,97.142,2,False,True,False,False,True
792,134.162,3,True,False,False,True,True


In [126]:
len(df) - df["Aromatic Carbon"].sum()

694

In [128]:
def non_tmc1_element(smi: str, *elements):
    return any([element in smi for element in elements])

In [132]:
df["New element"] = [non_tmc1_element(smi, "Cl", "P", "Si") for smi in smiles]

In [134]:
df["New element"].sum()

82