In [1]:
!pip install --pre deepchem

Collecting deepchem
  Downloading deepchem-2.8.1.dev20240710195445-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting rdkit (from deepchem)
  Downloading rdkit-2024.3.3-cp310-cp310-manylinux_2_28_x86_64.whl (33.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.1/33.1 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit, deepchem
Successfully installed deepchem-2.8.1.dev20240710195445 rdkit-2024.3.3


In [5]:
import deepchem as dc
print(dc.__version__)

2.8.1.dev


In [10]:
import tensorflow as tf #import tensorflow

keras_model = tf.keras.Sequential([ #define a tensorflow model
    tf.keras.layers.Dense(1000, activation='relu'), #1000 inputs; activation function = relu
    tf.keras.layers.Dropout(rate=0.5), #50% dropout to avoid overfitting
    tf.keras.layers.Dense(1) #1 neuron output -> 0-1 output; what's the activation function? is it sigmoid / swish?
])

model = dc.models.KerasModel(keras_model, dc.models.losses.L2Loss()) #make a DeepChem model from the tensorflow model

In [11]:
tasks, datasets, transformers = dc.molnet.load_delaney(featurizer="ECFP", splitter="random")
train_dataset, valid_dataset, test_dataset = datasets
model.fit(train_dataset, nb_epoch=50)

0.03364995241165161

In [12]:
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

In [14]:
print('training set score:{} '.format(model.evaluate(train_dataset, [metric])))
print('testing set score:{} '.format(model.evaluate(test_dataset, [metric])))

training set score:{'pearson_r2_score': 0.9773601192050011} 
testing set score:{'pearson_r2_score': 0.6940334482092807} 


In [17]:
import torch

#define a pytorch model
pytorch_model = torch.nn.Sequential( #this is the same architecture as the keras model I just trained
    torch.nn.Linear(1024, 1000),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(1000,1)
)

#1024 -> 1000 -> 1 (solubility evaluation)

#convert it into a DeepChem model
model = dc.models.TorchModel(pytorch_model, dc.models.losses.L2Loss()) #l2loss

#train the model
model.fit(train_dataset, nb_epoch=50)

#evaluate the model
print('training set score: {}'.format(model.evaluate(train_dataset,[metric])))
print('testing set score: {}'.format(model.evaluate(test_dataset, [metric])))

training set score: {'pearson_r2_score': 0.9784133078846854}
testing set score: {'pearson_r2_score': 0.7037231114273185}


In [20]:
#question: how do I actually run inference on this model?
#I should spend time breaking up the evaluate and inference and rebuilding them today to get a better understanding of how the model runs
#like, do I *have* to use DeepChem for this after running the fingerprint? could I take the data and run with it in vanilla PyTorch?

In [24]:
class ClassificationModel(tf.keras.Model):
  def __init__(self):
    super(ClassificationModel, self).__init__()
    self.dense1 = tf.keras.layers.Dense(1000, activation='relu')
    self.dense2 = tf.keras.layers.Dense(1)

  def call(self, inputs, training=False):
    y = self.dense1(inputs)
    if training:
      y = tf.nn.dropout(y, 0.5)
    logits = self.dense2(y)
    output = tf.nn.sigmoid(logits)
    return output, logits

keras_model = ClassificationModel()
output_types = ['prediction', 'loss']
model = dc.models.KerasModel(keras_model, dc.models.losses.SigmoidCrossEntropy(), output_types=output_types)

In [26]:
tasks, datasets, transformers = dc.molnet.load_bace_classification(featurizer="ECFP", splitter="scaffold")
train_dataset, valid_dataset, test_dataset = datasets
model.fit(train_dataset, nb_epoch=100)

metric=dc.metrics.Metric(dc.metrics.roc_auc_score)
print('training set score:', model.evaluate(train_dataset, [metric]))
print('test set score:', model.evaluate(test_dataset, [metric]))

training set score: {'roc_auc_score': 0.9996228260110358}
test set score: {'roc_auc_score': 0.7626811594202899}


In [28]:
class ClassificationModel(torch.nn.Module):

  def __init__(self):
    super(ClassificationModel, self).__init__()
    self.dense1 = torch.nn.Linear(1024, 1000)
    self.dense2 = torch.nn.Linear(1000, 1)

  def forward(self, inputs):
    y = torch.nn.functional.relu(self.dense1(inputs))
    y = torch.nn.functional.dropout(y, p=0.5, training=self.training)
    logits = self.dense2(y)
    output = torch.sigmoid(logits)
    return output, logits

In [30]:
torch_model = ClassificationModel()
output_types = ['prediction', 'loss']
model = dc.models.TorchModel(torch_model, dc.models.losses.SigmoidCrossEntropy(), output_types=output_types)

In [31]:
tasks, datasets, transformers = dc.molnet.load_bace_classification(featurizer="ECFP", splitter="scaffold")
train_dataset, valid_dataset, test_dataset = datasets
model.fit(train_dataset, nb_epoch=100)
metric = dc.metrics.Metric(dc.metrics.roc_auc_score)

In [32]:
print('training set score: {}'.format(model.evaluate(train_dataset, [metric])))
print('test set score: {}'.format(model.evaluate(test_dataset, [metric])))

training set score: {'roc_auc_score': 0.9996284137738353}
test set score: {'roc_auc_score': 0.7622282608695652}


In [34]:
""" Other Torch / Keras stuff

Other Features
KerasModel and TorchModel have lots of other features. Here are some of the more important ones.

Automatically saving checkpoints during training.
Logging progress to the console, to TensorBoard , or to Weights & Biases .
Custom loss functions that you define with a function of the form f(outputs, labels, weights) .
Early stopping using the ValidationCallback class.
Loading parameters from pre-trained models.
Estimating uncertainty in model outputs.
Identifying important features through saliency mapping.
By wrapping your own models in a KerasModel or TorchModel , you get immediate access to all these features. See the API documentation for full details on them.

"""

' Other Torch / Keras stuff\n\nOther Features\nKerasModel and TorchModel have lots of other features. Here are some of the more important ones.\n\nAutomatically saving checkpoints during training.\nLogging progress to the console, to TensorBoard , or to Weights & Biases .\nCustom loss functions that you define with a function of the form f(outputs, labels, weights) .\nEarly stopping using the ValidationCallback class.\nLoading parameters from pre-trained models.\nEstimating uncertainty in model outputs.\nIdentifying important features through saliency mapping.\nBy wrapping your own models in a KerasModel or TorchModel , you get immediate access to all these features. See the API documentation for full details on them.\n\n'