# Train a neural network to predict solubility of a molecule using Deepchem

In [20]:
import deepchem as dc

In [21]:
tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='GraphConv')

In [22]:
train, valid, test = datasets

In [23]:
print(train.X.shape)

(902,)


In [24]:
print(train.y.shape)

(902, 1)


In [25]:
print(train.w.shape)

(902, 1)


In [26]:
print(test.X.shape)

(113,)


In [27]:
model = dc.models.GraphConvModel(n_tasks=1, mode='regression', graph_conv_layers=[128,128], dropout=0.20)

In [28]:
model.fit(train, nb_epoch=100)

0.07936591148376465

In [30]:
metrics = dc.metrics.Metric(dc.metrics.pearson_r2_score)
train_scores = model.evaluate(train, [metrics], transformers)
print(train_scores)

test_score = model.evaluate(test, [metrics], transformers)
print(test_score)

{'pearson_r2_score': 0.9374500190907639}
{'pearson_r2_score': 0.6986325676356265}


In [31]:
smiles = ['Nc1cccc(O)c1', 'CC1CCCC(C)C1']

In [32]:
from rdkit import Chem

In [33]:
mol = [Chem.MolFromSmiles(molecules) for molecules in smiles]
mol

[<rdkit.Chem.rdchem.Mol at 0x172100e0740>,
 <rdkit.Chem.rdchem.Mol at 0x172100e0200>]

In [34]:
featurizer = dc.feat.ConvMolFeaturizer()
featurized_mol = featurizer.featurize(mol)

In [35]:
prediction = model.predict_on_batch(featurized_mol)
print(prediction)

[[ 1.8283415 ]
 [-0.61796635]]
