In [1]:
import pandas as pd
import numpy as np
import os
from rdkit import Chem
from rdkit.Chem import PandasTools, rdDepictor, AllChem
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG
from rdkit.Chem.PandasTools import ChangeMoleculeRendering

#Bokeh library for plotting
import json
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.transform import factor_cmap
from bokeh.plotting import figure, output_file, save

from sklearn.manifold import TSNE

In [2]:
import os
os.getcwd()

'C:\\Users\\sunoj'

In [3]:
os.chdir('C://Users/sunoj/Downloads/')

In [4]:
df=pd.read_csv('gargee_meta_yield406.csv')

In [5]:
df.head()

Unnamed: 0,smiles,Yield
0,CC1(C)[N+](c2c(C(C)C)cccc2C(C)C)=C([Ru]3(Cl)(C...,67
1,CC1(C)[N+](c2c(C(C)C)cccc2C(C)C)=C([Ru]3(Cl)(C...,71
2,CC1(C)[N+](c2c(C(C)C)cccc2C(C)C)=C([Ru]3(Cl)(C...,50
3,CC1(C)[N+](c2c(C(C)C)cccc2C(C)C)=C([Ru]3(Cl)(C...,71
4,CC1(C)[N+](c2c(C(C)C)cccc2C(C)C)=C([Ru]3(Cl)(C...,50


In [6]:
mols=[Chem.MolFromSmiles(i) for i in df['smiles']]
fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in mols if x!=None]
X=np.array(fps)
print(X.shape)

(406, 1024)


In [7]:
tsne = TSNE(random_state=0).fit_transform(X) #doing TSNE

In [8]:
def _prepareMol(mol,kekulize):
    mc = Chem.Mol(mol.ToBinary())
    if kekulize:
        try:
            Chem.Kekulize(mc)
        except:
            mc = Chem.Mol(mol.ToBinary())
    if not mc.GetNumConformers():
        rdDepictor.Compute2DCoords(mc)
    return mc

def moltosvg(mol,molSize=(450,200),kekulize=True,drawer=None,**kwargs):
    mc = _prepareMol(mol,kekulize)
    if drawer is None:
        drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
    drawer.DrawMolecule(mc,**kwargs)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    return SVG(svg.replace('svg:',''))

In [10]:
from tqdm import tqdm
  
for i in tqdm (range (100), desc="Loading..."):
    mols=[Chem.MolFromSmiles(i) for i in df['smiles']]

Loading...: 100%|████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.66it/s]


In [11]:
for i in tqdm (range (100), desc="Loading..."):
    svgs = [moltosvg(m).data for m in mols]

Loading...: 100%|████████████████████████████████████████████████████████████████████| 100/100 [06:21<00:00,  3.81s/it]


In [15]:
ChangeMoleculeRendering(renderer='PNG')


source = ColumnDataSource(data=dict(x=tsne[:,0], y=tsne[:,1], desc= df.index, 
                                    svgs=svgs))

hover = HoverTool(tooltips="""
    <div>
        <div>@svgs{safe}
        </div>
        <div>
            <span style="font-size: 17px; font-weight: bold;">@desc</span>
        </div>
    </div>
    """
)
interactive_map = figure(plot_width=1000, plot_height=1000, tools=['reset,box_zoom,wheel_zoom,zoom_in,zoom_out,pan',hover],
           title="Metathesis Reaction Map")



interactive_map.circle('x', 'y', size=5, source=source, fill_alpha=0.2);


In [16]:
output_file("Gargee_interactive_map.html")
save(interactive_map)

'C:\\Users\\sunoj\\Downloads\\Gargee_interactive_map.html'