In [1]:
import pandas as pd
import tmap as tm
from mhfp.encoder import MHFPEncoder
from faerun import Faerun

In [2]:
# Loading data from:
# PUBCHEM_BIOASSAY: Navigating the Kinome.
# (https://www.ebi.ac.uk/chembl/assay_report_card/CHEMBL1963834/)
df = pd.read_csv("CHEMBL25-chembl_activity-X98QJiCI4eAUAQSKQevT44ZjymoCjs8alCsnJir8aUU=.csv.gz", sep=";")

In [3]:
# The number of permutations used by the MinHashing algorithm
perm = 512

# Initializing the MHFP encoder with 512 permutations
enc = MHFPEncoder(perm)

# Initialize the LSH Forest
lf = tm.LSHForest(perm)

# Create MHFP fingerprints from SMILES
# The fingerprint vectors have to be of the tm.VectorUint data type
fingerprints = [tm.VectorUint(enc.encode(s)) for s in df["Smiles"]]

# Add the Fingerprints to the LSH Forest and index
lf.batch_add(fingerprints)
lf.index()

# Get the coordinates
x, y, s, t, _ = tm.layout_from_lsh_forest(lf)

# Let's color by active / inactive, which is set in the "Comments" field
# of the dataframe
active = [1 if a == "active" else 0 for a in df["Comment"]]

# Now plot the data
faerun = Faerun(view="front", coords=False)
faerun.add_scatter(
    "assay",
    {"x": x, "y": y, "c": active, "labels": df["Smiles"]},
    point_scale=5,
    colormap="tab10",
    has_legend=True,
    categorical=True,
    legend_labels=[(0, "Inactive"), (1, "Active")]
)
faerun.add_tree("assay_tree", {"from": s, "to": t}, point_helper="assay")

# Choose the "smiles" template to display structure on hover
faerun.plot(template="smiles")