In [1]:
import datetime
from random import randrange
from pathlib import Path
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import exmol
from exmol.plot_utils  import _mol_images
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors

#make folder for the generated images
dt_now = datetime.date.today().isoformat() 
 
path = Path.cwd() / "assets" /  dt_now

try:
    path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")
else:
    print("Folder was created")    

Folder is already there


In [3]:
#Toy model
def my_model(smiles):
    """
    Insert your pipeline:
    1) Smiles sanitation
    2) Data featurization
    3) Prediction of model from 2)
        Exmol is compatible with both regressino and classification. 
    """
    return randrange(10) # Should be the value of 3) 

# Mol of interest
base = 'O=C(C)Oc1ccccc1C(=O)O'
samples = exmol.sample_space(base, lambda smi, sel: my_model(smi), batched=False)

🤘Done🤘: 100% 1919.0/1919 [00:20<00:00, 92.35it/s]                   


In [4]:
#Convert image into svg
for i in range(0, len(samples)):
    plt.figure(figsize=(5,5))
    plt.imshow(np.asarray(_mol_images([samples[0],samples[i]], (400,400), 5)[1]))
    plt.axis('off')
    plt.gca().set_position([0, 0, 1, 1])
    plt.savefig("{}/{}.svg".format(path, i))
    plt.clf()
    plt.close()

In [8]:
# Make sure the data is stored in assets folder. 
# Edit based on your needs. 
df = pd.DataFrame({})
df["IMG_URL"] = ["assets/{}/{}.svg".format(dt_now ,i) for i in  range(0, len(samples))]
df["NAME"] = ["Chemical Space {}".format(i) for i in range(0, len(samples))]
df["PAGE"] = ["".format(i) for i in range(0, df.shape[0])]
df["PCA1"] = [i.position[0] for i in samples]
df["PCA2"] = [i.position[-1] for i in samples]
df["TPSA"] = [round(rdMolDescriptors.CalcTPSA(Chem.MolFromSmiles(i.smiles)),2) for i in samples]
df["MW"] = [round(rdMolDescriptors._CalcMolWt(Chem.MolFromSmiles(i.smiles)),2) for i in samples]
df["Predictions"] = [i.yhat for i in samples]
df["DESC"] = ["Insert cool descriptions here if you want!".format(i) for i in range(0, len(samples))]
df["FORM"] = [rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(i.smiles)) for i in samples]
df.to_csv("data/data.csv", index=False)