In [18]:
import os.path as osp
import numpy as np
import pickle as pkl
import warnings
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from bindpredict.featuregen import Featurizer, create_X_Y
from bindpredict.utils.rdkitutils import smiles_to_mol
warnings.filterwarnings('ignore')
import os.path as osp

This notebook contains codes for loading the trained model and test it on the given test set. It takes a `csv` file with the same format as the original dataset as input. Then, Copy your `cvs` file to the `data` folder and the trained models into the `models` folder.

In [23]:
# set the name of your csv file 
compounds = pd.read_csv('./data/test_set.csv', sep=',')
# Remove compounds that have multiple category assigned
compounds = compounds[compounds['L1_class_name'].str.find(',') == -1]
compounds['L1_class_name'] = compounds.L1_class_name.apply(lambda x: x[2:-2])
# Check for invalid smiles
validation_results = standardize(compounds['canonical_smiles'].values)
# add these flags of being a valid or invalid compound to the dataset
compounds.insert(loc=len(compounds.columns), column='Valid', value=validation_results) 
curated_compounds = compounds[compounds['Valid'] == 'Yes']

In [24]:
# create molecular features and fingerprints
compound_smiles = curated_compounds['canonical_smiles'].values
mols = smiles_to_mol(compound_smiles)
mol_featurizer = Featurizer()
features = mol_featurizer.generate(mols)
for key in features.keys():
    curated_compounds.insert(loc=len(curated_compounds.columns), 
                             column=key, 
                             value=features[key])

In [25]:
# Combine features
feature_keys = ['LogP', 'MolWt', 'PSA', 'NumHAcceptors', 'NumHDonors',
                'NumRotatableBonds', 'RingCount', 'ECPF4', 'MACCS']

X_test, y_test = create_X_Y(curated_compounds, feature_keys, 'L1_class_name')

In [22]:
# load the trained model 
model_path = './models/singleClassifier.pkl'
singleClassifier = pkl.load(open(model_path, 'rb'))
# run the test
preds = singleClassifier.predict(X_test)
print(classification_report(y_true=y_test,y_pred=preds))

                      precision    recall  f1-score   support

              Enzyme       0.00      0.00      0.00       0.0
Epigenetic regulator       0.00      0.00      0.00       0.0
         Ion channel       0.00      0.00      0.00       0.0
   Membrane receptor       0.00      0.00      0.00       0.0
Transcription factor       0.00      0.00      0.00       0.0
Unclassified protein       0.00      0.00      0.00       0.0
    anscription fact       0.00      0.00      0.00      34.0
             ansport       0.00      0.00      0.00      17.0
    classified prote       0.00      0.00      0.00      44.0
        creted prote       0.00      0.00      0.00       2.0
 her cytosolic prote       0.00      0.00      0.00      12.0
  her membrane prote       0.00      0.00      0.00       3.0
   her nuclear prote       0.00      0.00      0.00       3.0
    igenetic regulat       0.00      0.00      0.00      31.0
       mbrane recept       0.00      0.00      0.00      82.0
       