# Replicating iwatobipen JAK3 results with DeepChem#
    - Matt Robinson

https://iwatobipen.wordpress.com/2017/05/18/graph-convolution-classification-with-deepchem/

The prolific and well-known chemoinformatics blogger *iwatobipen* released his result using a graph convolutional network using DeepChem. Here we compare his results to those obtained with our own, much simpler gcn.

The dataset is JAK3 inhibitor activity data obtained from CHEMBL and availalbe on iwatobipen's github: 

https://github.com/iwatobipen/deeplearning

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import deepchem as dc
from deepchem.models.tensorgraph.models.graph_models import GraphConvModel

  from numpy.core.umath_tests import inner1d


In [4]:
graph_featurizer = dc.feat.graph_features.ConvMolFeaturizer()
loader = dc.data.data_loader.CSVLoader(tasks=['activity_class'],
                                       smiles_field="CANONICAL_SMILES",
                                       id_field="CMPD_CHEMBLID",
                                       featurizer=graph_featurizer)

dataset = loader.featurize('./jak3_activities.csv')
 
splitter = dc.splits.splitters.RandomSplitter()
trainset, testset = splitter.train_test_split(dataset, frac_train=0.8)

Loading raw samples now.
shard_size: 8192
About to start loading CSV from ./jak3_activities.csv
Loading shard 1 of size 8192.
Featurizing sample 0
TIMING: featurizing shard 0 took 3.504 s
TIMING: dataset construction took 6.570 s
Loading dataset from disk.
TIMING: dataset construction took 5.421 s
Loading dataset from disk.
TIMING: dataset construction took 4.324 s
Loading dataset from disk.


In [5]:
model = GraphConvModel(n_tasks=1, n_classes=2, mode='classification',
                       tensorboard=True,  model_dir='models/',
                       dropout=0.2, graph_conv_layers=[64,64])

In [6]:
%%time
# fit for 50 epochs, as in post
for _ in range(10):
    train_loss = model.fit(trainset, nb_epoch=5)
    print('train_loss: ', train_loss)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


('train_loss: ', 52.21642932891846)
('train_loss: ', 49.081698870658876)
('train_loss: ', 47.03386784791947)
('train_loss: ', 47.48215756416321)
('train_loss: ', 43.89353243112564)
('train_loss: ', 42.1603260755539)
('train_loss: ', 40.23352180719375)
('train_loss: ', 38.77171133756637)
('train_loss: ', 38.65405027866363)
('train_loss: ', 36.0815532207489)
CPU times: user 6min 24s, sys: 1min 2s, total: 7min 27s
Wall time: 6min 24s


In [7]:
train_scores = model.evaluate(
                trainset,
                [dc.metrics.Metric(dc.metrics.accuracy_score),
                 dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean)]
                )
print('train scores')
print(train_scores)

computed_metrics: [0.8486111111111111]
computed_metrics: [0.9008319940417859]
train scores
{'mean-roc_auc_score': 0.9008319940417859, 'accuracy_score': 0.8486111111111111}


In [8]:
test_scores = model.evaluate(
                testset,
                [dc.metrics.Metric(dc.metrics.accuracy_score),
                 dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean)]
                )
print('test scores')
print(test_scores)

computed_metrics: [0.7277777777777777]
computed_metrics: [0.7352307692307692]
test scores
{'mean-roc_auc_score': 0.7352307692307692, 'accuracy_score': 0.7277777777777777}


In [50]:
def evaluate_dc_classifier(model, test_ds, classes=[0,1]):
    "evaluate a DeepChem model for classification"
    
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import roc_auc_score, auc, roc_curve
    from sklearn.preprocessing import label_binarize
    
    def bs_roc_auc_score(y_true, y_prob, n_boostraps=1000):
        "code to bootstrap the auc score: copied from Ogrisel's SO answer"

        n_bootstraps = 1000
        rng_seed = 42  # control reproducibility
        bootstrapped_scores = []

        rng = np.random.RandomState(rng_seed)
        for i in range(n_bootstraps):
            # bootstrap by sampling with replacement on the prediction indices
            indices = rng.randint(0, len(y_prob) - 1, len(y_prob))
            indices = [int(idx) for idx in indices]
            y_true = np.array(y_true)
            y_prob = np.array(y_prob)
            if len(np.unique(y_true[indices])) < 2:
                # We need at least one positive and one negative sample for ROC AUC
                # to be defined: reject the sample
                continue

            score = roc_auc_score(y_true[indices], y_prob[indices])
            bootstrapped_scores.append(score)

        sorted_scores = np.array(bootstrapped_scores)
        sorted_scores.sort()

        # Computing the lower and upper bound of the 90% confidence interval
        # You can change the bounds percentiles to 0.025 and 0.975 to get
        # a 95% confidence interval instead.
        confidence_lower = sorted_scores[int(0.05 * len(sorted_scores))]
        confidence_upper = sorted_scores[int(0.95 * len(sorted_scores))]
        return [confidence_lower, confidence_upper]

    y_true = list(testset.y[:,0].astype(int))
    
    prob_arr = np.array([x[0] for x in model.predict(testset)])
    y_pred = list(np.argmax(prob_arr, axis=1))

    print('accuracy: ', accuracy_score(y_true, y_pred))
    print('classification report: ')
    print(classification_report(y_true, y_pred))

    if len(classes) == 2:
        y_prob = list(prob_arr[:,-1])
        print('roc-auc: ', roc_auc_score(y_true, y_prob))
        print('bootstrapped roc-auc: ', bs_roc_auc_score(y_true, y_prob))

    else:
        y_test = label_binarize(y_true, classes=classes)
        n_classes = y_test.shape[1]

        # Compute ROC curve and ROC area for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        bs_roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test[:, i], prob_arr[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            bs_roc_auc[i] = bs_roc_auc_score(y_test[:, i], prob_arr[:, i])
        
        fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), prob_arr.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        bs_roc_auc['micro'] = bs_roc_auc_score(y_test.ravel(), prob_arr.ravel())

        print("micro auc score and score for each class: ")
        for key in roc_auc:
            print(key,' : ', roc_auc[key])
        print("bootstrapped micro auc score and score for each class: ")
        for key in bs_roc_auc:
            print(key,' : ', bs_roc_auc[key])

In [51]:
evaluate_dc_classifier(model, testset, classes=[0,1])

('accuracy: ', 0.7277777777777777)
classification report: 
             precision    recall  f1-score   support

          0       0.76      0.91      0.83       130
          1       0.52      0.26      0.35        50

avg / total       0.69      0.73      0.69       180

('roc-auc: ', 0.7352307692307692)
('bootstrapped roc-auc: ', [0.6692913385826772, 0.8013001695873375])


In [52]:
%%time
# train for additional 100 epochs
for _ in range(20):
    train_loss = model.fit(trainset, nb_epoch=5)
    print('train_loss: ', train_loss)

('train_loss: ', 35.518444657325745)
('train_loss: ', 34.883203136920926)
('train_loss: ', 32.570836985111235)
('train_loss: ', 33.2912921667099)
('train_loss: ', 31.446497976779938)
('train_loss: ', 30.657007586956023)
('train_loss: ', 29.75057816505432)
('train_loss: ', 28.672795355319977)
('train_loss: ', 27.94592696428299)
('train_loss: ', 26.42917002439499)
('train_loss: ', 27.486838233470916)
('train_loss: ', 26.633309388160704)
('train_loss: ', 25.24140247106552)
('train_loss: ', 24.68913254737854)
('train_loss: ', 5.514284610748291)
('train_loss: ', 23.650760960578918)
('train_loss: ', 22.06888137459755)
('train_loss: ', 21.34109171628952)
('train_loss: ', 20.93572798371315)
('train_loss: ', 20.26791494488716)
CPU times: user 12min 12s, sys: 2min 1s, total: 14min 14s
Wall time: 13min 2s


In [53]:
train_scores = model.evaluate(
                trainset,
                [dc.metrics.Metric(dc.metrics.accuracy_score),
                 dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean)]
                )
print('train scores')
print(train_scores)

computed_metrics: [0.9291666666666667]
computed_metrics: [0.9850505664223276]
train scores
{'mean-roc_auc_score': 0.9850505664223276, 'accuracy_score': 0.9291666666666667}


In [54]:
test_scores = model.evaluate(
                testset,
                [dc.metrics.Metric(dc.metrics.accuracy_score),
                 dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean)]
                )
print('test scores')
print(test_scores)

computed_metrics: [0.7555555555555555]
computed_metrics: [0.7764615384615385]
test scores
{'mean-roc_auc_score': 0.7764615384615385, 'accuracy_score': 0.7555555555555555}


In [56]:
evaluate_dc_classifier(model, testset, classes=[0,1])

('accuracy: ', 0.7555555555555555)
classification report: 
             precision    recall  f1-score   support

          0       0.78      0.92      0.84       130
          1       0.61      0.34      0.44        50

avg / total       0.73      0.76      0.73       180

('roc-auc: ', 0.7764615384615384)
('bootstrapped roc-auc: ', [0.7159024103468548, 0.8395388689827076])
