# Hyperparameter tuning with Raytune and visulization using Tensorboard and Weights & Biases

## Initial imports

In [2]:
import os
import numpy as np
import pandas as pd
import torch
from torch.optim import SGD, lr_scheduler

from pytorch_widedeep import Trainer
from pytorch_widedeep.preprocessing import TabPreprocessor
from pytorch_widedeep.models import TabMlp, WideDeep
from torchmetrics import F1Score as F1_torchmetrics
from torchmetrics import Accuracy as Accuracy_torchmetrics
from torchmetrics import Precision as Precision_torchmetrics
from torchmetrics import Recall as Recall_torchmetrics
from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score
from pytorch_widedeep.initializers import XavierNormal
from pytorch_widedeep.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    RayTuneReporter,
)
from pytorch_widedeep.datasets import load_bio_kdd04

from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune import JupyterNotebookReporter
from ray.tune.integration.wandb import WandbLoggerCallback, wandb_mixin
import wandb

import tracemalloc

tracemalloc.start()

# increase displayed columns in jupyter notebook
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 300)

In [2]:
df = load_bio_kdd04(as_frame=True)
df.head()

Unnamed: 0,EXAMPLE_ID,BLOCK_ID,target,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77
0,279,261532,0,52.0,32.69,0.3,2.5,20.0,1256.8,-0.89,0.33,11.0,-55.0,267.2,0.52,0.05,-2.36,49.6,252.0,0.43,1.16,-2.06,-33.0,-123.2,1.6,-0.49,-6.06,65.0,296.1,-0.28,-0.26,-3.83,-22.6,-170.0,3.06,-1.05,-3.29,22.9,286.3,0.12,2.58,4.08,-33.0,-178.9,1.88,0.53,-7.0,-44.0,1987.0,-5.41,0.95,-4.0,-57.0,722.9,-3.26,-0.55,-7.5,125.5,1547.2,-0.36,1.12,9.0,-37.0,72.5,0.47,0.74,-11.0,-8.0,1595.1,-1.64,2.83,-2.0,-50.0,445.2,-0.35,0.26,0.76
1,279,261533,0,58.0,33.33,0.0,16.5,9.5,608.1,0.5,0.07,20.5,-52.5,521.6,-1.08,0.58,-0.02,-3.2,103.6,-0.95,0.23,-2.87,-25.9,-52.2,-0.21,0.87,-1.81,10.4,62.0,-0.28,-0.04,1.48,-17.6,-198.3,3.43,2.84,5.87,-16.9,72.6,-0.31,2.79,2.71,-33.5,-11.6,-1.11,4.01,5.0,-57.0,666.3,1.13,4.38,5.0,-64.0,39.3,1.07,-0.16,32.5,100.0,1893.7,-2.8,-0.22,2.5,-28.5,45.0,0.58,0.41,-19.0,-6.0,762.9,0.29,0.82,-3.0,-35.0,140.3,1.16,0.39,0.73
2,279,261534,0,77.0,27.27,-0.91,6.0,58.5,1623.6,-1.4,0.02,-6.5,-48.0,621.0,-1.2,0.14,-0.2,73.6,609.1,-0.44,-0.58,-0.04,-23.0,-27.4,-0.72,-1.04,-1.09,91.1,635.6,-0.88,0.24,0.59,-18.7,-7.2,-0.6,-2.82,-0.71,52.4,504.1,0.89,-0.67,-9.3,-20.8,-25.7,-0.77,-0.85,0.0,-20.0,2259.0,-0.94,1.15,-4.0,-44.0,-22.7,0.94,-0.98,-19.0,105.0,1267.9,1.03,1.27,11.0,-39.5,82.3,0.47,-0.19,-10.0,7.0,1491.8,0.32,-1.29,0.0,-34.0,658.2,-0.76,0.26,0.24
3,279,261535,0,41.0,27.91,-0.35,3.0,46.0,1921.6,-1.36,-0.47,-32.0,-51.5,560.9,-0.29,-0.1,-1.11,124.3,791.6,0.0,0.39,-1.85,-21.7,-44.9,-0.21,0.02,0.89,133.9,797.8,-0.08,1.06,-0.26,-16.4,-74.1,0.97,-0.8,-0.41,66.9,955.3,-1.9,1.28,-6.65,-28.1,47.5,-1.91,1.42,1.0,-30.0,1846.7,0.76,1.1,-4.0,-52.0,-53.9,1.71,-0.22,-12.0,97.5,1969.8,-1.7,0.16,-1.0,-32.5,255.9,-0.46,1.57,10.0,6.0,2047.7,-0.98,1.53,0.0,-49.0,554.2,-0.83,0.39,0.73
4,279,261536,0,50.0,28.0,-1.32,-9.0,12.0,464.8,0.88,0.19,8.0,-51.5,98.1,1.09,-0.33,-2.16,-3.9,102.7,0.39,-1.22,-3.39,-15.2,-42.2,-1.18,-1.11,-3.55,8.9,141.3,-0.16,-0.43,-4.15,-12.9,-13.4,-1.32,-0.98,-3.69,8.8,136.1,-0.3,4.13,1.89,-13.0,-18.7,-1.37,-0.93,0.0,-1.0,810.1,-2.29,6.72,1.0,-23.0,-29.7,0.58,-1.1,-18.5,33.5,206.8,1.84,-0.13,4.0,-29.0,30.1,0.8,-0.24,5.0,-14.0,479.5,0.68,-0.59,2.0,-36.0,-6.9,2.02,0.14,-0.23


In [3]:
# imbalance of the classes
df["target"].value_counts()

0    144455
1      1296
Name: target, dtype: int64

In [4]:
# drop columns we won't need in this example
df.drop(columns=["EXAMPLE_ID", "BLOCK_ID"], inplace=True)

In [5]:
df_train, df_valid = train_test_split(
    df, test_size=0.2, stratify=df["target"], random_state=1
)
df_valid, df_test = train_test_split(
    df_valid, test_size=0.5, stratify=df_valid["target"], random_state=1
)

## Preparing the data

In [6]:
continuous_cols = df.drop(columns=["target"]).columns.values.tolist()

In [7]:
# deeptabular
tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)
X_tab_train = tab_preprocessor.fit_transform(df_train)
X_tab_valid = tab_preprocessor.transform(df_valid)
X_tab_test = tab_preprocessor.transform(df_test)

# target
y_train = df_train["target"].values
y_valid = df_valid["target"].values
y_test = df_test["target"].values

## Define the model

In [8]:
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = 1
hidden_layers = np.linspace(
    input_layer * 2, output_layer, 5, endpoint=False, dtype=int
).tolist()

In [9]:
deeptabular = TabMlp(
    mlp_hidden_dims=hidden_layers,
    column_idx=tab_preprocessor.column_idx,
    continuous_cols=tab_preprocessor.continuous_cols,
)
model = WideDeep(deeptabular=deeptabular)
model

WideDeep(
  (deeptabular): Sequential(
    (0): TabMlp(
      (cat_and_cont_embed): DiffSizeCatAndContEmbeddings(
        (cont_norm): BatchNorm1d(74, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (tab_mlp): MLP(
        (mlp): Sequential(
          (dense_layer_0): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=74, out_features=148, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_1): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=148, out_features=118, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_2): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=118, out_features=89, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_3): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_featu

In [10]:
# Metrics from torchmetrics
accuracy = Accuracy_torchmetrics(average=None, num_classes=1)
precision = Precision_torchmetrics(average="micro", num_classes=1)
f1 = F1_torchmetrics(average=None, num_classes=1)
recall = Recall_torchmetrics(average=None, num_classes=1)

In [11]:
# # Metrics from pytorch-widedeep
# accuracy = Accuracy(top_k=2)
# precision = Precision(average=False)
# recall = Recall(average=True)
# f1 = F1Score(average=False)

In [13]:
config = {
    "batch_size": tune.grid_search([1000, 5000]),
    "wandb": {
        "project": "test",
        "api_key_file": os.getcwd() + "/wandb_api.key",
    },
}

# Optimizers
deep_opt = SGD(model.deeptabular.parameters(), lr=0.1)
# LR Scheduler
deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)


@wandb_mixin
def training_function(config, X_train, X_val):
    early_stopping = EarlyStopping()
    model_checkpoint = ModelCheckpoint(save_best_only=True, wb=wandb)
    # Hyperparameters
    batch_size = config["batch_size"]
    trainer = Trainer(
        model,
        objective="binary_focal_loss",
        callbacks=[RayTuneReporter, early_stopping, model_checkpoint],
        lr_schedulers={"deeptabular": deep_sch},
        initializers={"deeptabular": XavierNormal},
        optimizers={"deeptabular": deep_opt},
        metrics=[accuracy, precision, recall, f1],
        verbose=0,
    )

    trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=batch_size)


X_train = {"X_tab": X_tab_train, "target": y_train}
X_val = {"X_tab": X_tab_valid, "target": y_valid}

asha_scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration",
    metric="_metric/val_loss",
    mode="min",
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)

analysis = tune.run(
    tune.with_parameters(training_function, X_train=X_train, X_val=X_val),
    resources_per_trial={"cpu": 1, "gpu": 0},
    progress_reporter=JupyterNotebookReporter(overwrite=True),
    scheduler=asha_scheduler,
    config=config,
    callbacks=[
        WandbLoggerCallback(
            project=config["wandb"]["project"],
            api_key_file=config["wandb"]["api_key_file"],
            log_config=True,
        )
    ],
)

Trial name,status,loc,batch_size,iter,total time (s)
training_function_99db5_00000,TERMINATED,,1000,5,18.2696
training_function_99db5_00001,TERMINATED,,5000,5,19.0039


[2m[36m(pid=4633)[0m 2022-01-03 15:51:46,590	ERROR worker.py:428 -- SystemExit was raised from the worker
[2m[36m(pid=4633)[0m Traceback (most recent call last):
[2m[36m(pid=4633)[0m   File "python/ray/_raylet.pyx", line 684, in ray._raylet.task_execution_handler
[2m[36m(pid=4633)[0m   File "python/ray/_raylet.pyx", line 524, in ray._raylet.execute_task
[2m[36m(pid=4633)[0m   File "python/ray/_raylet.pyx", line 561, in ray._raylet.execute_task
[2m[36m(pid=4633)[0m   File "python/ray/_raylet.pyx", line 568, in ray._raylet.execute_task
[2m[36m(pid=4633)[0m   File "python/ray/_raylet.pyx", line 572, in ray._raylet.execute_task
[2m[36m(pid=4633)[0m   File "python/ray/_raylet.pyx", line 522, in ray._raylet.execute_task.function_executor
[2m[36m(pid=4633)[0m   File "/Users/javier/.pyenv/versions/3.7.7/envs/widedeep37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 579, in actor_method_executor
[2m[36m(pid=4633)[0m     return method(__ray_acto

[2m[36m(ImplicitFunc pid=4636)[0m 


[2m[36m(pid=4636)[0m wandb: Waiting for W&B process to finish, PID 4816... (success).
[2m[36m(pid=4636)[0m wandb: - 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: \ 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: | 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: / 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: - 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: \ 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: | 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: / 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: - 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: \ 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: | 0.00MB of 0.00MB uploaded (0.00MB deduped)
[2m[36m(pid=4636)[0m wandb: / 0.00MB of 0.00MB uploaded (0.00MB deduped)

In [21]:
analysis.results

{'e1f7e_00000': {'_metric': {'train_loss': 0.005808346435172945,
   'train_Accuracy': [0.99256432056427],
   'train_Precision': 0.9885057210922241,
   'train_Recall': [0.1658630669116974],
   'train_F1': [0.28406277298927307],
   'val_loss': 0.004733487094442049,
   'val_Accuracy': [0.9952658414840698],
   'val_Precision': 1.0,
   'val_Recall': [0.4651162922382355],
   'val_F1': [0.6349206566810608]},
  'time_this_iter_s': 4.078662157058716,
  'done': True,
  'timesteps_total': None,
  'episodes_total': None,
  'training_iteration': 5,
  'experiment_id': '1e85b31098a64f678ebfdd2285959ea5',
  'date': '2022-01-03_15-46-31',
  'timestamp': 1641221191,
  'time_total_s': 19.46595311164856,
  'pid': 3979,
  'hostname': 'infinito.bbrouter',
  'node_ip': '192.168.18.39',
  'config': {'batch_size': 1000,
   'wandb': {'project': 'test',
    'api_key_file': '/Users/javier/Projects/pytorch-widedeep/examples/notebooks/wandb_api.key'}},
  'time_since_restore': 19.46595311164856,
  'timesteps_since_r

Using Weights and Biases logging you can create [parallel coordinates graphs](https://docs.wandb.ai/ref/app/features/panels/parallel-coordinates) that map parametr combinations to the best(lowest) loss achieved during the training of the networks

![WNB](figures/wnb.png "parallel coordinates")

local visualization of raytune reults using tensorboard

In [23]:
%load_ext tensorboard
%tensorboard --logdir ~/ray_results