In [2]:
from rdkit import Chem
from rdkit.Chem import Draw
import altair as alt
import numpy as np

import pandas as pd


In [3]:
# load a small sample of 50 drug-like ligands from the RCSB Ligand Expo ( http://ligand-expo.rcsb.org/ )
df = pd.read_csv('./pdbexpo.csv')

In [4]:
df.head()

Unnamed: 0,smiles,name,molwt,clogp,hba,hbd,hb,tpsa,n_rot,phosphate
0,Cc1ccccc1.Cl[Ru]Cl.C2N3CN4CN2CP(C3)C4,RAX,420.981539,3.53162,3,0,3,23.31,0,False
1,C[CH](O)c1nccn1Cc2cc(on2)c3ccc(cc3)C#CCCCO,H4O,351.158292,2.7637,6,2,8,84.31,6,False
2,Clc1ccccc1NC2=NC(Nc3ccccc23)=C4C=CC=CC4=O,O74,347.08254,4.5309,4,2,6,53.49,1,False
3,Fc1ccccc1[S](=O)(=O)NNC(=O)c2ccccc2,NB7,294.047441,1.449,3,2,5,83.65,4,False
4,COc1ccc(CN2CCc3n(CCO)nc(C(=O)NCc4ccccc4)c3C2)c...,6RB,470.231791,3.5255,6,2,8,79.62,8,False


In [5]:
df['mols'] = [Chem.MolFromSmiles(i) for i in df['smiles']]

#generate some fake potency values
df['potency'] = np.random.random(size=len(df))*5 + 4

#save all as img files
for mol, name in zip(df['mols'], df['name']):
    Draw.MolToImageFile(mol, f'./imgs/{name}.png',legend=name)

# Encode `(x,y)` scatterplot tooltips with image URL:

In [6]:
#set the url path. Note: 
#the column must be called 'image' or altair will think it's text
col='image'
df[col] = f'https://raw.githubusercontent.com/ljmartin/altair_mols/master/imgs/'+df['name']+'.png'


In [7]:
#generate a scatterplot (mark_point) with a tooltip 
#referring to the image URLs

chart = alt.Chart(df[['molwt', 'clogp', col, 'potency']]).mark_point().encode(
    y=alt.Y('clogp:Q'),
    x='molwt:Q',
    tooltip=[col],
    color='potency'

).properties(
    width=500,
    height=400
)

#run in jupyter:
#chart.interactive()

#save as html:
chart.interactive().save('scatterplot.html')
chart.save('scatterplot.json')

# Now encode the y axis with images as well:

Since the images are always visible along the y axis, they take up more room. We will just use 10 molecules to make it legible. Note that, in this case, the y axis has one set of molecules, and the tooltips show another set of molecules (this might be used, for example, in comparing scaffold hops or nearest neighbours).

In [8]:
#take 10 ligands at random:
small_df = df.sample(10).copy()
#set the yAxis url path:
small_df = small_df.rename({'image':'yAxis'}, axis=1)

#now set the tooltips url path:
names = df['image'].values
np.random.shuffle(names)
small_df['image'] = names[:10]

In [10]:
h = 800 #height

yAxis = alt.Chart(small_df[['name', 'image', 'yAxis', 'potency']]).mark_image().encode(
    y=alt.Y('name:O', axis=alt.Axis(domainOpacity=0, 
                                    ticks=False, 
                                    title=None,
                                   labels=False)),
    url="yAxis"
).properties(
    width=150,
    height=h)

chart = alt.Chart(small_df[['name', 'image', 'yAxis', 'potency', 'tpsa']]).mark_point(size=150,filled=True,
                                                                             ).encode(
    y=alt.Y('name:O', axis=alt.Axis(ticks=True, 
                                    domain=False, 
                                    grid=True,
                                    labels=True)),
    x=alt.X('potency', scale=alt.Scale(zero=False)),
    color='tpsa',
    tooltip=['image'],
).properties(
    width=500,
    height=h)

ch = alt.concat(
    yAxis, chart
).configure_concat(
    spacing=0
).configure_view(
    strokeOpacity=0
)
ch.save('ylabels.html')
ch.save('ylabels.json')