In [None]:
!pip install --pre deepchem

Collecting deepchem
  Downloading deepchem-2.8.1.dev20240710195445-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting rdkit (from deepchem)
  Downloading rdkit-2024.3.3-cp310-cp310-manylinux_2_28_x86_64.whl (33.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.1/33.1 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit, deepchem
Successfully installed deepchem-2.8.1.dev20240710195445 rdkit-2024.3.3


In [None]:
import deepchem as dc
tasks, datasets, transformers = dc.molnet.load_tox21(featurizer="GraphConv")
train_dataset, valid_dataset, test_dataset = datasets

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


In [None]:
n_tasks = len(tasks)

In [None]:
model = dc.models.GraphConvModel(n_tasks, mode='classification')
model.fit(train_dataset, nb_epoch=50)

0.285104866027832

In [None]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score)

{'roc_auc_score': 0.9685375763685982}


In [None]:
print("Training set score: {}".format(model.evaluate(train_dataset, [metric])))
print("Testing set score: {}".format(model.evaluate(test_dataset, [metric])))

Training set score: {'roc_auc_score': 0.9685375763685982}
Testing set score: {'roc_auc_score': 0.7078875365620526}


In [None]:
from deepchem.models.layers import GraphConv, GraphPool, GraphGather
import tensorflow as tf
import tensorflow.keras.layers as layers

In [None]:
batch_size = 100

In [28]:
class MyGraphConvModel(tf.keras.Model):

  def __init__(self):
    super(MyGraphConvModel, self).__init__()
    self.gc1 = GraphConv(128, activation_fn=tf.nn.tanh)
    self.batch_norm1 = layers.BatchNormalization()
    self.gp1 = GraphPool()

    self.gc2 = GraphConv(128, activation_fn=tf.nn.tanh)
    self.batch_norm2 = layers.BatchNormalization()
    self.gp2 = GraphPool()

    self.dense1 = layers.Dense(256, activation=tf.nn.tanh)
    self.batch_norm3 = layers.BatchNormalization()
    self.readout = GraphGather(batch_size=batch_size, activation_fn=tf.nn.tanh)

    self.dense2 = layers.Dense(n_tasks*2)
    self.logits = layers.Reshape((n_tasks, 2))
    self.softmax = layers.Softmax()

  def call(self, inputs):
    gc1_output = self.gc1(inputs)
    batch_norm1_output = self.batch_norm1(gc1_output)
    gp1_output = self.gp1([batch_norm1_output] + inputs[1:])

    gc2_output = self.gc2([gp1_output] + inputs[1:])
    batch_norm2_output = self.batch_norm1(gc2_output)
    gp2_output = self.gp2([batch_norm2_output] + inputs[1:])

    dense1_output = self.dense1(gp2_output)
    batch_norm3_output = self.batch_norm3(dense1_output)
    readout_output = self.readout([batch_norm3_output] + inputs[1:])

    logits_output = self.logits(self.dense2(readout_output))
    return self.softmax(logits_output)


In [29]:
model = dc.models.KerasModel(MyGraphConvModel(), loss=dc.models.losses.CategoricalCrossEntropy())

In [30]:
test_dataset.X[0]

<deepchem.feat.mol_graphs.ConvMol at 0x797688d330a0>

In [31]:
from deepchem.metrics import to_one_hot
from deepchem.feat.mol_graphs import ConvMol
import numpy as np

def data_generator(dataset, epochs=1):
  for ind, (X_b, y_b, w_b, ids_b) in enumerate(dataset.iterbatches(batch_size, epochs,
                                                                   deterministic=False, pad_batches=True)):
    multiConvMol = ConvMol.agglomerate_mols(X_b)
    inputs = [multiConvMol.get_atom_features(), multiConvMol.deg_slice, np.array(multiConvMol.membership)]
    for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
      inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
    labels = [to_one_hot(y_b.flatten(), 2).reshape(-1, n_tasks, 2)]
    weights = [w_b]
    yield (inputs, labels, weights)

In [32]:
model.fit_generator(data_generator(train_dataset, epochs=50))

0.25049108505249024

In [33]:
print("Training set score: {}".format(model.evaluate_generator(data_generator(train_dataset), [metric], transformers)))
print("Testing set score: {}".format(model.evaluate_generator(data_generator(test_dataset), [metric], transformers)))

Training set score: {'roc_auc_score': 0.842380454794352}
Testing set score: {'roc_auc_score': 0.6311216566750266}
