In [1]:
!pip install --pre deepchem
!pip install hyperopt

Collecting deepchem
  Downloading deepchem-2.8.1.dev20240724182210-py3-none-any.whl.metadata (2.0 kB)
Collecting rdkit (from deepchem)
  Downloading rdkit-2024.3.3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.9 kB)
Downloading deepchem-2.8.1.dev20240724182210-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rdkit-2024.3.3-cp310-cp310-manylinux_2_28_x86_64.whl (33.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.1/33.1 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit, deepchem
Successfully installed deepchem-2.8.1.dev20240724182210 rdkit-2024.3.3


In [4]:
import deepchem as dc
tasks, datasets, transformers = dc.molnet.load_hiv(featurizer='ECFP', split='scaffold')
train_dataset, valid_dataset, test_dataset = datasets



In [6]:
from hyperopt import hp, fmin, tpe, Trials

In [7]:
search_space = {
    'layer_sizes': hp.choice('layer_sizes', [[500], [1000], [2000], [1000,1000]]),
    'dropouts': hp.uniform('dropout', low=0.2, high=0.5),
    'learning_rate': hp.uniform('learning_rate', high=0.001, low=0.0001)
}

In [13]:
import tempfile

metric = dc.metrics.Metric(dc.metrics.roc_auc_score)

def fm(args):
  save_dir = tempfile.mkdtemp()
  model = dc.models.MultitaskClassifier(n_tasks=len(tasks),n_features=1024,layer_sizes=args['layer_sizes'],dropouts=args['dropouts'],learning_rate=args['learning_rate'])
  validation=dc.models.ValidationCallback(valid_dataset, 1000, [metric],save_dir=save_dir,transformers=transformers,save_on_minimum=False)

  model.fit(train_dataset, nb_epoch=25,callbacks=validation)

  model.restore(model_dir=save_dir)
  valid_score = model.evaluate(valid_dataset, [metric], transformers)

  return -1*valid_score['roc_auc_score']


In [14]:
trials=Trials()
best = fmin(fm,
    		space= search_space,
    		algo=tpe.suggest,
    		max_evals=15,
    		trials = trials)

  0%|          | 0/15 [00:00<?, ?trial/s, best loss=?]Step 1000 validation: roc_auc_score=0.743966
Step 2000 validation: roc_auc_score=0.767642
Step 3000 validation: roc_auc_score=0.777662
Step 4000 validation: roc_auc_score=0.784627
Step 5000 validation: roc_auc_score=0.785042
Step 6000 validation: roc_auc_score=0.777364
Step 7000 validation: roc_auc_score=0.791405
Step 8000 validation: roc_auc_score=0.777883
  7%|▋         | 1/15 [02:38<37:05, 158.94s/trial, best loss: -0.7914048721340388]Step 1000 validation: roc_auc_score=0.726446
Step 2000 validation: roc_auc_score=0.774004
Step 3000 validation: roc_auc_score=0.773903
Step 4000 validation: roc_auc_score=0.775721
Step 5000 validation: roc_auc_score=0.772847
Step 6000 validation: roc_auc_score=0.767186
Step 7000 validation: roc_auc_score=0.761912
Step 8000 validation: roc_auc_score=0.758989
 13%|█▎        | 2/15 [07:25<50:40, 233.87s/trial, best loss: -0.7914048721340388]Step 1000 validation: roc_auc_score=0.759919
Step 2000 valida

In [15]:
print("Best: {}".format(best))

Best: {'dropout': 0.23306727396399113, 'layer_sizes': 1, 'learning_rate': 0.000959210283134042}
