In [1]:
import pandas as pd
import tmap
from faerun import Faerun
from mhfp.encoder import MHFPEncoder
from rdkit.Chem import AllChem

In [2]:
dff = pd.read_csv(r'C:\Users\Administrator\Desktop\前2000.csv')
dff.shape

(2000, 2)

In [3]:
# The number of permutations used by the MinHashing algorithm
perm = 512

# Initializing the MHFP encoder with 512 permutations
enc = MHFPEncoder(perm)

# Create MHFP fingerprints from SMILES
# The fingerprint vectors have to be of the tm.VectorUint data type
fingerprints = [tmap.VectorUint(enc.encode(s)) for s in dff["ligandsm"]]

In [4]:
# Initialize the LSH Forest
lf = tmap.LSHForest(perm)

# Add the Fingerprints to the LSH Forest and index
lf.batch_add(fingerprints)
lf.index()

In [5]:
# Get the coordinates
x, y, s, t, _ = tmap.layout_from_lsh_forest(lf)

In [8]:
# Now plot the data
faerun = Faerun(view="front", coords=False)
faerun.add_scatter(
    "ESOL_Basic",
    {   "x": x, 
        "y": y, 
        "c": list(dff.nmetal.values), 
        "labels": dff["ligandsm"]},
    point_scale=5,
    colormap = ['rainbow'],
    has_legend=True,
    legend_title = ['number of metal(MHFP)'],
    categorical=[False],
    shader = 'smoothCircle'
)

faerun.add_tree("ESOL_Basic_tree", {"from": s, "to": t}, point_helper="ESOL_Basic")

# Choose the "smiles" template to display structure on hover
faerun.plot('ESOL_Basic', template="smiles", notebook_height=750)