In [1]:
import numpy as np
import tensorflow as tf
import deepchem as dc
from sklearn.kernel_ridge import KernelRidge

from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals


In [2]:
dataset_file = "../../deepchem/datasets/gdb1k.sdf"
smiles_field = "smiles"
mol_field = "mol"
featurizer = dc.feat.CoulombMatrixEig(23, remove_hydrogens=False)

loader = dc.data.SDFLoader(
      tasks=["atomization_energy"], smiles_field="smiles",
      featurizer=featurizer,
      mol_field="mol")
dataset = loader.featurize(dataset_file)


Loading raw samples now.
shard_size: 8192
Reading structures from ../../deepchem/datasets/gdb1k.sdf.
Currently featurizing feature_type: CoulombMatrixEig
Featurizing sample 0
TIMING: featurizing shard 0 took 0.936 s
TIMING: dataset construction took 1.251 s
Loading dataset from disk.


In [10]:
# Split dataset
random_splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = random_splitter.train_valid_test_split(dataset)

# Apply normalisation transformer
transformers = [
    dc.trans.NormalizationTransformer(transform_X=True, dataset=train_dataset),
    dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]

for dataset in [train_dataset, valid_dataset, test_dataset]:
    for transformer in transformers:
        dataset = transformer.transform(dataset)

TIMING: dataset construction took 0.014 s
Loading dataset from disk.
TIMING: dataset construction took 0.013 s
Loading dataset from disk.
TIMING: dataset construction took 0.012 s
Loading dataset from disk.
TIMING: dataset construction took 0.014 s
Loading dataset from disk.
TIMING: dataset construction took 0.010 s
Loading dataset from disk.
TIMING: dataset construction took 0.011 s
Loading dataset from disk.
TIMING: dataset construction took 0.016 s
Loading dataset from disk.
TIMING: dataset construction took 0.009 s
Loading dataset from disk.
TIMING: dataset construction took 0.009 s
Loading dataset from disk.


  X = np.nan_to_num((X - self.X_means) / self.X_stds)


In [4]:
def krr_model_builder(model_params, model_dir):
    sklearn_model = KernelRidge(**model_params)
    return dc.models.SklearnModel(sklearn_model, model_dir)

params_dict = {
    "kernel": ["laplacian", "linear"],
    "alpha": [0.0001, 0.001, 0.01, 0.01],
    "gamma": [0.0001, 0.001, 0.01, 0.1]
}

metric = dc.metrics.Metric(dc.metrics.mean_absolute_error)

optimizer = dc.hyper.HyperparamOpt(krr_model_builder)
best_krr, best_krr_hyperparams, all_krr_results = optimizer.hyperparam_search(
    params_dict, train_dataset, valid_dataset, transformers,
    metric=metric)




Fitting model 1/32
hyperparameters: {'kernel': 'laplacian', 'alpha': 0.0001, 'gamma': 0.0001}
computed_metrics: [87980.70585905973]
Model 1/32, Metric mean_absolute_error, Validation set 0: 87980.705859
	best_validation_score so far: 87980.705859
Fitting model 2/32
hyperparameters: {'kernel': 'laplacian', 'alpha': 0.0001, 'gamma': 0.001}
computed_metrics: [92282.21589433127]
Model 2/32, Metric mean_absolute_error, Validation set 1: 92282.215894
	best_validation_score so far: 92282.215894
Fitting model 3/32
hyperparameters: {'kernel': 'laplacian', 'alpha': 0.0001, 'gamma': 0.01}
computed_metrics: [90250.99087160644]
Model 3/32, Metric mean_absolute_error, Validation set 2: 90250.990872
	best_validation_score so far: 92282.215894
Fitting model 4/32
hyperparameters: {'kernel': 'laplacian', 'alpha': 0.0001, 'gamma': 0.1}
computed_metrics: [104291.15005851959]
Model 4/32, Metric mean_absolute_error, Validation set 3: 104291.150059
	best_validation_score so far: 104291.150059
Fitting model 5

In [9]:
## Keras implementation 
from tensorflow import keras

# Sigmoid activation function 

model = tf.keras.Sequential([
# Adds a densely-connected layer with 10 units to the model:
keras.layers.Dense(400, activation='sigmoid', input_shape=(23,)),
keras.layers.Dense(100, activation='sigmoid'),
keras.layers.Dense(100, activation='sigmoid'),
# Add a softmax layer with 10 output units:
keras.layers.Dense(1, activation='sigmoid')])

# Configure a model for mean-squared error regression.
model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
              loss='mse',       # mean squared error
              metrics=['mae', 'mse'])  # mean absolute error

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

Instructions for updating:
Use tf.cast instead.


In [11]:
train_dataset

<deepchem.data.datasets.DiskDataset at 0x1a2db83390>

In [None]:
train_dataset, valid_dataset, test_dataset

history = model.fit(X_train, y_train, validation_split = 0.2, epochs=30, 
                    callbacks=[early_stop], batch_size=10, verbose=0)

In [5]:
from deepchem.molnet import load_qm7_from_mat
from deepchem.models.tensorgraph.optimizers import ExponentialDecay

print(tf.__version__)

qm7_tasks, datasets, transformers = load_qm7_from_mat(
    split='stratified', move_mean=True, reload=False)


train_dataset, valid_dataset, test_dataset = datasets

fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)]

rate = 0.001
dc_model = dc.models.MultitaskFitTransformRegressor(
    n_tasks=1,
    n_features=[23, 23],
    learning_rate=rate,
    momentum=.8,
    batch_size=25,
    weight_init_stddevs=[1 / np.sqrt(400), 1 / np.sqrt(100), 1 / np.sqrt(100)],
    bias_init_consts=[0., 0., 0.],
    layer_sizes=[400, 100, 100],
    dropouts=[0.01, 0.01, 0.01],
    fit_transformers=fit_transformers,
    n_evals=10,
    seed=123)




1.13.1
TIMING: dataset construction took 0.651 s
Loading dataset from disk.
TIMING: dataset construction took 0.515 s
Loading dataset from disk.
TIMING: dataset construction took 0.225 s
Loading dataset from disk.
TIMING: dataset construction took 0.213 s
Loading dataset from disk.
TIMING: dataset construction took 0.434 s
Loading dataset from disk.
TIMING: dataset construction took 0.057 s
Loading dataset from disk.
TIMING: dataset construction took 0.055 s
Loading dataset from disk.
Instructions for updating:
Colocations handled automatically by placer.


In [7]:
# Fit trained model
dc_model.fit(train_dataset, nb_epoch=50)

train_scores = dc_model.evaluate(train_dataset, metric, transformers)
valid_scores = dc_model.evaluate(valid_dataset, metric, transformers)

InvalidArgumentError: You must feed a value for placeholder tensor 'input_2' with dtype float and shape [?]
	 [[node input_2 (defined at /Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/deepchem/models/tensorgraph/fcnet.py:221) ]]

Caused by op 'input_2', defined at:
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 499, in start
    self.io_loop.start()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/asyncio/base_events.py", line 1425, in _run_once
    handle._run()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/asyncio/events.py", line 127, in _run
    self._callback(*self._args)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-8ee6a435eb18>", line 27, in <module>
    seed=123)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/deepchem/models/tensorgraph/fcnet.py", line 346, in __init__
    n_tasks, n_features, batch_size=batch_size, **kwargs)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/deepchem/models/tensorgraph/fcnet.py", line 221, in __init__
    dropout_switch = Input(shape=tuple())
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/keras/engine/input_layer.py", line 231, in Input
    input_tensor=tensor)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/keras/engine/input_layer.py", line 107, in __init__
    name=self.name)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/keras/backend.py", line 876, in placeholder
    x = array_ops.placeholder(dtype, shape=shape, name=name)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 2077, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 5791, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_2' with dtype float and shape [?]
	 [[node input_2 (defined at /Users/nathalie.willems/anaconda3/envs/deepchem/lib/python3.5/site-packages/deepchem/models/tensorgraph/fcnet.py:221) ]]
