In [1]:
from models import BaseModule
from utils import get_runs

import ipywidgets as widgets

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split

from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger

import wandb

In [2]:
# initialize widget for notebook parameters
dataset_name = widgets.Dropdown(
    options=['Iris', 'synthetic'],
    description='Dataset:'
)
model_name = widgets.Dropdown(
    options=['linear', 'MLP'],
    description='Model:'
)
num_epochs = widgets.IntSlider(
    min=10,
    max=100,
    step=10,
    description='# of epochs:',
    orientation='horizontal',
)
test_mode = widgets.ToggleButtons(
    options=[('Hyperparameters', False), ('Test', True)],
    description='Mode:'
)

# display widgets
display(dataset_name)
display(model_name)
display(num_epochs)
display(test_mode)

Dropdown(description='Dataset:', options=('Iris', 'synthetic'), value='Iris')

Dropdown(description='Model:', options=('linear', 'MLP'), value='linear')

IntSlider(value=10, description='# of epochs:', min=10, step=10)

ToggleButtons(description='Mode:', options=(('Hyperparameters', False), ('Test', True)), value=False)

In [3]:
# W&B parameters
PROJECT = 'merck-training'
CONFIG = {
    'method': 'grid',
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'batch_size': {'values': [64, 128]},
        'learning_rate': {'values': [3e-3, 1e-3, 3e-4, 1e-4]}
    }
}

SWEEP_PATHS = {
    'Iris': {
        'MLP': 'boctrl-c/merck-training/sweeps/z92u55iu',
        'linear': 'boctrl-c/merck-training/sweeps/l7247jdg'
    },
    'synthetic': {
        'MLP': 'boctrl-c/merck-training/sweeps/yak28k5i',
        'linear': 'boctrl-c/merck-training/sweeps/req7e9fo'
    }
}

In [4]:
# load the dataset
if dataset_name.value == 'Iris':
    # Load the IRIS dataset
    sklearn_data = load_iris()
    data = pd.DataFrame(sklearn_data.data, columns=sklearn_data.feature_names)
    data['species'] = pd.Categorical.from_codes(sklearn_data.target, sklearn_data.target_names)
    display(data.head())
    
    X = data.iloc[:,:-1].to_numpy()
    y = sklearn_data.target
    
if dataset_name.value == 'synthetic':
    c_names = [
        'feature1_t0',
        'feature2_t0',
        'feature3_t0',
        'feature1_t1',
        'feature2_t1',
        'feature3_t1',
        'feature1_t2',
        'feature2_t2',
        'feature3_t2',
        'score_t0',
        'score_t1',
        'score_t2',
        'score',
        'label'
    ]
    data = pd.read_csv('data.csv', names=c_names, skiprows=1)
    display(data.head())

    X = data.iloc[:,:-2].to_numpy()
    y = data.iloc[:,-1].to_numpy()

num_features = X.shape[-1]
num_classes = len(np.unique(y))
print('# of features: {}, # of classes: {}'.format(num_features, num_classes))

# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# to PyTorch tensors
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).long()
y_test = torch.tensor(y_test).long()

# compute features' mean and std on the train split
mean_train = X_train.mean(dim=0, keepdim=True)
std_train = X_train.std(dim=0, keepdim=True)

# normalize
X_train = (X_train - mean_train)/std_train
X_test = (X_test - mean_train)/std_train

Unnamed: 0,feature1_t0,feature2_t0,feature3_t0,feature1_t1,feature2_t1,feature3_t1,feature1_t2,feature2_t2,feature3_t2,score_t0,score_t1,score_t2,score,label
0,1.275952,9.548464,20.857206,-1.640917,9.879204,19.796847,1.286034,7.853225,20.916851,-12.885027,-13.454494,-10.525529,-13.805164,1.0
1,2.209974,11.65787,18.994037,-1.583167,11.1852,20.050245,-0.040625,10.566484,19.212929,-11.824399,-13.999659,-11.552003,-10.581159,1.0
2,-0.928405,11.05774,22.520939,1.421343,11.178265,20.241026,-0.029118,10.790355,20.34029,-11.021825,-12.921086,-14.071246,-10.88728,1.0
3,0.58343,9.428129,19.557302,0.102584,9.106648,14.846111,-1.09392,10.77897,20.88442,-15.48535,-15.343734,-12.31357,-14.453414,0.0
4,0.142398,9.179987,20.252988,-1.483179,9.365358,19.794396,1.470028,12.056651,20.820639,-13.315184,-13.604042,-14.272901,-15.524159,0.0


# of features: 12, # of classes: 2


In [5]:
# build PyTorch-compatible datasets from tensors
train_set = TensorDataset(X_train, y_train)
test_set = TensorDataset(X_test, y_test)

if not test_mode.value:
    dataset = train_set

    # generate a validation split from the training set
    train_set, val_set = random_split(dataset, [.8, .2],
        generator=torch.Generator().manual_seed(1))

if not test_mode.value:
    print('train set size: {}\nval set size: {}\ntest set size: {}'.format(
        len(train_set),
        len(val_set),
        len(test_set)
    ))
if test_mode.value:
    print('train set size: {}\ntest set size: {}'.format(
        len(train_set),
        len(test_set)
    ))

train set size: 3760
test set size: 940


In [6]:
# initialize the W&B sweep
if not test_mode.value:
    wandb.login()
    sweep_id = wandb.sweep(CONFIG, project=PROJECT)

def train():
    """Wraps the training process for W&B compatibility.
    """

    if not test_mode.value:
        run = wandb.init()
        config = wandb.config
        
        # retrieve hyperparameters from the current sweep run
        bs = config['batch_size']
        lr = config['learning_rate']
    
    if test_mode.value:
        df = get_runs(SWEEP_PATHS[dataset_name.value][model_name.value])
        df = df.sort_values(['val_acc'], ascending=[False]).iloc[0]
        
        # retrieve best hyperparameters
        bs = df['batch_size'].item()
        lr = df['learning_rate'].item()

    # initialize dataloaders
    train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
    if not test_mode.value: val_loader = DataLoader(val_set, batch_size=bs)
    test_loader = DataLoader(test_set, batch_size=bs)

    # initialize the model
    if model_name.value == 'linear': # linear model
        model = nn.Linear(num_features, num_classes)
        
    if model_name.value == 'MLP': # Multilayer perceptron (MLP)
        model = nn.Sequential(
        nn.Linear(num_features, 128), # layer of neurons
        nn.ReLU(), # activation function
        nn.Linear(128, num_classes)
    )

    # Lightning wrapper
    model = BaseModule(model, num_classes=num_classes, lr=lr)

    if not test_mode.value:
        trainer = Trainer(
            max_epochs=num_epochs.value,
            accelerator='gpu',
            devices=[0],
            logger=WandbLogger(),
            log_every_n_steps=10,
            default_root_dir='checkpoints'
        )
    if test_mode.value:
        trainer = Trainer(
            max_epochs=num_epochs.value,
            accelerator='gpu',
            devices=[0],
            default_root_dir='checkpoints_test'
        )

    if not test_mode.value:
        trainer.fit(model, train_loader, val_loader) # train and validate
    if test_mode.value:
        trainer.fit(model, train_loader) # train
        trainer.test(dataloaders=test_loader, ckpt_path='last') # test

    if not test_mode.value:
        run.finish()

# run
if not test_mode.value: wandb.agent(sweep_id, function=train)
if test_mode.value: train()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name      | Type       | Params
-----------------------------------------
0 | model     | Sequential | 1.9 K 
1 | train_acc | Accuracy   | 0     
2 | val_acc   | Accuracy   | 0     
3 | test_acc  | Accuracy   | 0     
-----------------------------------------
1.9 K     Trainable params
0         Non-trainable params
1.9 K     Total params
0.008     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=30` reached.
  rank_zero_warn(
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.6872340440750122
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
