## Install deepchem

### Colab

In [None]:
!curl -Lo conda_installer.py https://raw.githubusercontent.com/deepchem/deepchem/master/scripts/colab_install.py
import conda_installer
conda_installer.install()
!/root/miniconda/bin/conda info -e

In [None]:
!pip install --pre deepchem-nightly==2.3.0
import deepchem
deepchem.__version__

In [None]:
import tensorflow


device_name = tensorflow.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

tensorflow.__version__

### Locally

Follow instructions here to setup your conda env with deepchem: https://deepchem.readthedocs.io/en/latest/installation.html#conda-installation

Additionally install requirements:

pip install -r requirements.txt

Test1

## Initialize

In [None]:
import itertools
from dataclasses import dataclass
from typing import Union, List, Tuple, NoReturn, Optional
import random
import os

import deepchem as dc
from deepchem.models import KerasModel
from tqdm import tqdm

ROOT_DIR = "/content/drive/My Drive/Colab Notebooks/deep learning for the life sciences/chapter_5_protein_binding"
DATA_DIR = os.path.join(ROOT_DIR, "data")

In [None]:
%env DEEPCHEM_DATA_DIR=/content/drive/My Drive/Colab Notebooks/deep learning for the life sciences/chapter_5_protein_binding/data

In [None]:
%env

In [None]:
import os
os.environ['DEEPCHEM_DATA_DIR']

## Load the data

### Cached data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from pathlib import Path
import os

print(os.listdir(DATA_DIR))
print(os.listdir('/content/drive'))

In [None]:
# for unmounting
# from google.colab import drive
# drive.flush_and_unmount()

In [None]:
pdbbind_tasks, pdbbind_datasets, transformers = dc.molnet.load_pdbbind(
    featurizer="grid", split="random", subset="core", data_dir=DATA_DIR, save_dir=os.path.join(DATA_DIR, "from-pdbbind"), split_seed=100, reload=True
)
train_dataset, valid_dataset, test_dataset = pdbbind_datasets

In [None]:
@dataclass
class OutputModel:
    model: dc.models.MultitaskRegressor
    layers: List[int]
    dropout: Union[float, List[float]]
    seed: int
    train_score: Optional[float]
    test_score: Optional[float]


def create_model(
    layer_sizes: List[int], dropouts: Union[float, List[float]], learning_rate: float = 0.0003
) -> dc.models.MultitaskRegressor:
    n_features = train_dataset.X.shape[1]
    model = dc.models.MultitaskRegressor(
        n_tasks=len(pdbbind_tasks),
        n_features=n_features,
        layer_sizes=layer_sizes,
        dropouts=dropouts,
        learning_rate=learning_rate,
        model_dir=f"{ROOT_DIR}/model_dumps/pdbbind_nn__{'-'.join(map(str, layer_sizes))}_dropout_{dropouts}_learningrate_{learning_rate}",
    )
    return model


def evaluate_model(model: dc.models.MultitaskRegressor) -> Tuple[float, float]:
    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
    train_scores = model.evaluate(train_dataset, [metric], transformers)["pearson_r2_score"]
    test_scores = model.evaluate(test_dataset, [metric], transformers)["pearson_r2_score"]
    return train_scores, test_scores


def visualize_model_output(model: OutputModel) -> NoReturn:
    print(
        f"""############################################################
Visualizing {model.model.__class__.__name__} model
  Training score: {model.train_score}
  Test score: {model.test_score}
  Hyperparameters: 
    Layers: {model.layers} 
    Dropout: {model.dropout}
    Seed: {model.seed}

Model summary:
""")
    model.model.model.summary()
    print("############################################################")

In [None]:
layers_list = [
    # [125, 62],
    # [250, 125],
    # [500, 250],
    # [1000, 500],
    # [2000, 1000],
    # [4000, 2000],
    [2000, 1000, 500, 250],
]

dropouts_list = [0.2, 0.5, 0.8]

seeds = [0, 10, 100, 1000]

hyperparameters = list(itertools.product(layers_list, dropouts_list, seeds))

(layers_list, dropouts_list, seeds)

In [None]:
nn_models = []

In [None]:
for i in tqdm(range(len(hyperparameters))):
    if i < len(models):
        continue
    layers, dropout, seed = hyperparameters[i]
    random.seed(seed)
    model = create_model(layer_sizes=layers, dropouts=dropout)
    model.fit(train_dataset, nb_epoch=50)
    train_score, test_score = evaluate_model(model=model)
    models.append(
        OutputModel(
            model=model,
            layers=layers,
            dropout=dropout,
            seed=seed,
            train_score=train_score,
            test_score=test_score,
        )
    )

In [None]:
for model in models:
    visualize_model_output(model)