# method 1

In [5]:
import numpy as np
from deepchem.models import GraphConvModel
import tensorflow as tf
import deepchem as dc
from deepchem.molnet import load_bace_regression

In [6]:
# Load bace dataset
bace_tasks, bace_datasets, transformers = load_bace_regression(
    featurizer='GraphConv', splitter='index')
train_dataset, valid_dataset, test_dataset = bace_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
    len(bace_tasks), batch_size=batch_size, mode='regression')

In [11]:
# Fit trained model
model.fit(train_dataset, nb_epoch=20)

0.061969642639160154

In [12]:
# Note: results change everytime 

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
test_scores = model.evaluate(test_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)

print("Test scores")
print(test_scores)

Evaluating model
Train scores
{'mean-pearson_r2_score': 0.9136103642740266}
Validation scores
{'mean-pearson_r2_score': 0.0011951266972749723}
Test scores
{'mean-pearson_r2_score': 0.2299841487297963}


# method 2

In [16]:
import deepchem as dc
import tensorflow as tf
from deepchem.molnet import load_bace_regression

keras_model = tf.keras.Sequential([
    tf.keras.layers.Dense(1000, activation='relu'),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(1)
])
model = dc.models.KerasModel(keras_model, dc.models.losses.L2Loss())

In [17]:
tasks, datasets, transformers = load_bace_regression(featurizer='ECFP', splitter='random', reload = False)

In [18]:
train_dataset, valid_dataset, test_dataset = datasets
model.fit(train_dataset, nb_epoch=50)
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
print('training set score:', model.evaluate(train_dataset, [metric]))
print('validation set score:', model.evaluate(valid_dataset, [metric]))
print('test set score:', model.evaluate(test_dataset, [metric]))

training set score: {'pearson_r2_score': 0.9777418951115199}
validation set score: {'pearson_r2_score': 0.7191636615291138}
test set score: {'pearson_r2_score': 0.8024871094356235}


# method 3

In [19]:
import torch

pytorch_model = torch.nn.Sequential(
    torch.nn.Linear(1024, 1000),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(1000, 1)
)

In [20]:
model = dc.models.TorchModel(pytorch_model, dc.models.losses.L2Loss())

model.fit(train_dataset, nb_epoch=50)
print('training set score:', model.evaluate(train_dataset, [metric]))
print('validation set score:', model.evaluate(valid_dataset, [metric]))
print('test set score:', model.evaluate(test_dataset, [metric]))

training set score: {'pearson_r2_score': 0.9771130105769968}
validation set score: {'pearson_r2_score': 0.710429314128445}
test set score: {'pearson_r2_score': 0.7811302408990711}


# method 4

In [37]:
import deepchem as dc
import numpy as np
from deepchem.models import GraphConvModel
import tensorflow as tf

In [38]:
bace_tasks = ['pIC50']
featurizer = dc.feat.ConvMolFeaturizer()
dataset_file = 'bace.csv'
loader = dc.data.CSVLoader(tasks=bace_tasks, smiles_field = 'mol', featurizer=featurizer)


smiles_field is deprecated and will be removed in a future version of DeepChem.Use feature_field instead.


In [39]:
dataset = loader.featurize(dataset_file)



In [40]:
df = dataset.to_dataframe()
df

Unnamed: 0,X,y,w,ids
0,<deepchem.feat.mol_graphs.ConvMol object at 0x...,9.154901,1.0,O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c...
1,<deepchem.feat.mol_graphs.ConvMol object at 0x...,8.853872,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(...
2,<deepchem.feat.mol_graphs.ConvMol object at 0x...,8.698970,1.0,S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...
3,<deepchem.feat.mol_graphs.ConvMol object at 0x...,8.698970,1.0,S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c...
4,<deepchem.feat.mol_graphs.ConvMol object at 0x...,8.698970,1.0,S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...
...,...,...,...,...
1508,<deepchem.feat.mol_graphs.ConvMol object at 0x...,3.000000,1.0,Clc1cc2nc(n(c2cc1)C(CC(=O)NCC1CCOCC1)CC)N
1509,<deepchem.feat.mol_graphs.ConvMol object at 0x...,3.000000,1.0,Clc1cc2nc(n(c2cc1)C(CC(=O)NCc1ncccc1)CC)N
1510,<deepchem.feat.mol_graphs.ConvMol object at 0x...,2.953115,1.0,Brc1cc(ccc1)C1CC1C=1N=C(N)N(C)C(=O)C=1
1511,<deepchem.feat.mol_graphs.ConvMol object at 0x...,2.733298,1.0,O=C1N(C)C(=NC(=C1)C1CC1c1cc(ccc1)-c1ccccc1)N


In [41]:
transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset = dataset)]

In [42]:
print('about to transform data')
for transormer in transformers:
    dataset = transormer.transform(dataset)

about to transform data


In [43]:
splitter = dc.splits.IndexSplitter()

In [44]:
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset)

In [45]:
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
    len(bace_tasks), batch_size=batch_size, mode='regression')

In [46]:
# Fit trained model
model.fit(train_dataset, nb_epoch=100)







0.03212337017059326

In [47]:
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
test_scores = model.evaluate(test_dataset, [metric], transformers)

print('training set score:', model.evaluate(train_dataset, [metric]))
print('validation set score:', model.evaluate(valid_dataset, [metric]))
print('test set score:', model.evaluate(test_dataset, [metric]))

Evaluating model
training set score: {'mean-pearson_r2_score': 0.9458961165826169}
validation set score: {'mean-pearson_r2_score': 0.0022668726183954326}
test set score: {'mean-pearson_r2_score': 0.15455300321141655}


# method 5

In [48]:
import numpy as np
from deepchem.models import GraphConvModel
import tensorflow as tf
import deepchem as dc
from deepchem.molnet import load_bace_regression

In [49]:
# Load bace dataset
bace_tasks, bace_datasets, transformers = load_bace_regression(
    featurizer='GraphConv', splitter='random')
train_dataset, valid_dataset, test_dataset = bace_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
    len(bace_tasks), batch_size=batch_size, mode='regression')

In [50]:
# Fit trained model
model.fit(train_dataset, nb_epoch=100)







0.04257047653198242

In [51]:
# Note: results change everytime 

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
test_scores = model.evaluate(test_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)

print("Test scores")
print(test_scores)

Evaluating model
Train scores
{'mean-pearson_r2_score': 0.953137496363069}
Validation scores
{'mean-pearson_r2_score': 0.5913067558534401}
Test scores
{'mean-pearson_r2_score': 0.590841922208634}
