In [1]:
from util import *
import pandas as pd
import numpy as np
import deepchem as dc
from matplotlib import pyplot as plt
import warnings
import os

warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
# load dataset
data = pd.read_csv("esol.csv")

## Graph Convolutional Models

In [None]:
_, dataset, transformers = dc.molnet.load_delaney(featurizer='GraphConv', splitter=None)

splitter = dc.splits.RandomSplitter()
train_set, test_set = splitter.train_test_split(dataset[0], frac_train=0.8, seed=0)

In [None]:
hyper_params = {
    'batch_size': [64, 32],
    'graph_conv_layers': [[64, 64], [128, 128]],
    'dense_layer_size': [256, 128],
    'dropout': [0.0],
}

batch_size, conv_layers, layer_sizes, dropout_rate = grid_search_graph_conv(train_set, hyper_params, transformers)

In [5]:
# intantiate and fit model
model = dc.models.GraphConvModel(1, mode='regression', batch_size=batch_size, graph_conv_layers=conv_layers, dense_layer_size=layer_sizes, dropout=dropout_rate)
model.fit(train_set, nb_epoch=100)

# evaluate model
metric = dc.metrics.Metric(dc.metrics.rms_score, np.mean)
train_scores = model.evaluate(train_set, [metric], transformers)
test_scores = model.evaluate(test_set, [metric], transformers)

print("Train scores")
print(train_scores)

print("Test scores")
print(test_scores)

Train scores
{'mean-rms_score': 0.2364051547554253}
Test scores
{'mean-rms_score': 0.8407922699434737}


## Message Passing Neural Network

In [2]:
_, dataset, transformers = dc.molnet.load_delaney(featurizer='Weave', splitter=None)

splitter = dc.splits.RandomSplitter()
train_set, test_set = splitter.train_test_split(dataset[0], frac_train=0.8, seed=0)

In [None]:
hyper_params = {
    'batch_size': [64],
    'n_atom_feat': [75],
    'n_pair_feat': [14],
    'n_hidden': [100]
}

batch_size, n_atom_feat, n_pair_feat, n_hidden = grid_search_mpnn(train_set, hyper_params, transformers)

In [None]:
# intantiate and fit model
model = dc.models.MPNNModel(1, mode='regression', batch_size=batch_size, use_queue=False, n_atom_feat=n_atom_feat, n_pair_feat=n_pair_feat, n_hidden=n_hidden, learning_rate=0.0001, T=3, M=5)
model.fit(train_set, nb_epoch=50, checkpoint_interval=100)

# evaluate model
metric = dc.metrics.Metric(dc.metrics.rms_score, np.mean)
train_scores = model.evaluate(train_set, [metric], transformers)
test_scores = model.evaluate(test_set, [metric], transformers)

print("Train scores")
print(train_scores)

print("Test scores")
print(test_scores)

## Random Forest Regressor

In [None]:
_, dataset, transformers = dc.molnet.load_delaney(splitter=None)

splitter = dc.splits.RandomSplitter()
train_set, test_set = splitter.train_test_split(dataset[0], frac_train=0.8, seed=0)

In [None]:
from sklearn.ensemble import RandomForestRegressor

hyper_params = {
    'n_estimators': [50, 100, 250, 500],
    'criterion': ['squared_error', 'absolute_error'],
}

batch_size, n_atom_feat, n_pair_feat, n_hidden = grid_search_random_forest(train_set, hyper_params, transformers)

sklearn_model = RandomForestRegressor(n_estimators=100, criterion='squared_error', random_state=0)
model = dc.models.SklearnModel(sklearn_model)