In [1]:
import pandas as pd
import tmap
from faerun import Faerun
from mhfp.encoder import MHFPEncoder
from rdkit.Chem import AllChem
from rdkit import Chem

In [2]:
dff = pd.read_csv(r'C:\Users\Administrator\Desktop\前2000.csv')
dff.shape

(2000, 2)

In [4]:
bits = 1024

mols = [Chem.MolFromSmiles(s) for s in dff['ligandsm']]
ECFP4_fps = [AllChem.GetMorganFingerprintAsBitVect(x,2,bits) for x in mols]
ecfp4_lists = [tmap.VectorUchar(list(fp)) for fp in ECFP4_fps]
# Initialize the Minhash
enc = tmap.Minhash(bits)

# Initialize the LSH Forest
lf_ecfp4 = tmap.LSHForest(bits)

# Add the Fingerprints to the LSH Forest and index
lf_ecfp4.batch_add(enc.batch_from_binary_array(ecfp4_lists))
lf_ecfp4.index()

In [6]:
x, y, s, t, _ = tmap.layout_from_lsh_forest(lf_ecfp4)

In [9]:
from rdkit.Chem import rdMolDescriptors
numrings = [rdMolDescriptors.CalcNumRings(Chem.MolFromSmiles(s)) for s in dff["ligandsm"]]
set(numrings)
is_linear = [1 if r == 0 else 0 for r in numrings]

In [15]:
# Now plot the data
faerun = Faerun(view="front", coords=False)
faerun.add_scatter(
    "ESOL_ECFP4",
    {   "x": x, 
        "y": y, 
        "c": [list(dff.nmetal.values), numrings, is_linear], 
        "labels": dff["ligandsm"]},
    point_scale=5,
    colormap = ['rainbow', 'Set1'],
    has_legend=True,
    categorical=[False, True, True],
    series_title = ['number of metal', 'Rings', 'is_linear'],
    legend_labels = [None, None, [(0, "No"), (1, "Yes")]],
    shader = 'smoothCircle'
)

faerun.add_tree("_ECFP4_tree", {"from": s, "to": t}, point_helper="ESOL_ECFP4")

# Choose the "smiles" template to display structure on hover
faerun.plot("ECFP4",template="smiles", notebook_height=750)