In [72]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import tmap
from faerun import Faerun
from mhfp.encoder import MHFPEncoder
from rdkit.Chem import AllChem

In [77]:
import pickle

# file = 'ligand_metal_VAE.pkl'

file = 'ligand_pubchem_VAE_32.pkl'

with open(file, 'rb') as f:
    data = pickle.load(f)

In [161]:
df = pd.read_csv(r'C:\Users\Administrator\Desktop\前2000.csv')
df.shape

(2000, 2)

In [74]:
metals = [j for i,j in enumerate(data) if i % 4 == 1]

mean_emd = np.array([j for i,j in enumerate(data) if i % 4 == 2],dtype='float64')
std_emd = np.array([j for i,j in enumerate(data) if i % 4 == 3],dtype='float64')

In [141]:
mean_emd2000=mean_emd[0:2000]
mean_emd2000

array([[-2.69825757e-03,  3.52687687e-02, -7.56738931e-02, ...,
        -7.25361146e-03, -3.94158326e-02, -4.29012515e-02],
       [ 5.30459546e-03,  2.90321726e-02,  1.30149826e-01, ...,
        -6.87790103e-03,  2.83216000e-01, -4.67458740e-05],
       [ 5.30182943e-03,  3.67771015e-02, -2.30401196e-02, ...,
        -5.72439283e-03,  7.42072463e-02, -3.62446439e-03],
       ...,
       [-7.74282962e-04, -9.26772133e-03,  3.91541012e-02, ...,
         4.41793352e-03,  1.86816633e-01,  2.19012983e-02],
       [ 2.21711285e-02,  8.91620517e-02,  8.25511456e-01, ...,
        -4.01651412e-02,  4.11146060e-02, -1.85164630e-01],
       [ 2.11997703e-03, -4.61515039e-04, -5.12342602e-02, ...,
         2.64071673e-03,  1.29593253e-01, -4.22082189e-03]])

In [153]:

fingerprints = [tmap.VectorFloat(s) for s in mean_emd2000]


In [151]:
# Initialize the LSH Forest
lf = tmap.LSHForest(32)
tmap.LSHForest.batch_add
# Add the Fingerprints to the LSH Forest and index
lf.batch_add(tmap.Minhash(32).batch_from_weight_array(fingerprints))
lf.index()

In [152]:
# Get the coordinates
x, y, s, t, _ = tmap.layout_from_lsh_forest(lf)

In [164]:
# Now plot the data
faerun = Faerun(view="front", coords=False)
faerun.add_scatter(
    "ESOL_Basic",
    {   "x": x, 
        "y": y, 
        "c": list(df.nmetal.values), 
        "labels": df["ligandsm"]},
    point_scale=5,
    colormap = ['rainbow'],
    has_legend=True,
    legend_title = ['number of metal(embedding)'],
    categorical=[False],
    shader = 'smoothCircle'
)

faerun.add_tree("ESOL_Basic_tree", {"from": s, "to": t}, point_helper="ESOL_Basic")

# Choose the "smiles" template to display structure on hover
faerun.plot('ESOL_Basic', template="smiles", notebook_height=750)