# Hyperparameter Tuning with Ray Tune and PyTorch

This notebook is a small demonstrator for how to perform hyperparameter tuning with Ray Tune and PyTorch.

In [1]:
import os
import numpy as np
import pandas as pd
import sklearn.model_selection
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import torchmetrics
from ray import tune
from ray.tune import CLIReporter

# Specifying training and testing data

In [2]:
class SETIDataset(Dataset):
    def __init__(self, dataframe, transform=None, target_transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Load the spectrogram
        spectrogram_path = self.dataframe.loc[idx, 'file_path']
        spectrogram = np.load(spectrogram_path)
        spectrogram = spectrogram.astype('float32')                     # convert from float16 to float32 for processing in neural network
        spectrogram = np.moveaxis(spectrogram, 0, -1)                   # Nedded because its saved as (CxHxW), but PyTorch expects axes as (HxWxC) and converts to (CxHxW)
        
        # Load the label of the spectrogram
        label = self.dataframe.loc[idx, 'target']
        
        # Apply transformations for processing with PyTorch
        if self.transform:
            spectrogram = self.transform(spectrogram)
        if self.target_transform:
            label = self.target_transform(label)
        return spectrogram, label

In [3]:
# Method for construction of the file paths for the training data
def get_train_file_path(spectrogram_id):
    return 'C:/Users/kaige/Desktop/kaggle_seti/seti-breakthrough-listen/train/' + spectrogram_id[0] +'/' + spectrogram_id + '.npy'

def load_data_loaders():
    # Read in the labels and ids of the spectrograms
    df_data = pd.read_csv('C:/Users/kaige/Desktop/kaggle_seti/seti-breakthrough-listen/train_labels.csv')
    df_data['file_path'] = df_data['id'].apply(get_train_file_path)

    # Split the data into training and testing data
    df_train, df_test = sklearn.model_selection.train_test_split(df_data, test_size=0.2, random_state=42)
    df_train = df_train.reset_index(drop=True)
    df_test = df_test.reset_index(drop=True)

    # Create pytorch datasets for training and testing data
    training_data = SETIDataset(
        dataframe = df_train,
        transform=ToTensor()
    )
    testing_data = SETIDataset(
        dataframe = df_test,
        transform=ToTensor()
    )

    # Create data loaders for training and testing data
    train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
    test_dataloader = DataLoader(testing_data, batch_size=64, shuffle=True)
    
    return train_dataloader, test_dataloader

# Building the CNN model

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self, config):
        super(NeuralNetwork, self).__init__()
        self.conv1 = nn.Conv2d(6, config['c1'], 3, 1)
        self.batch_norm1 = nn.BatchNorm2d(config['c1'])
        self.conv2 = nn.Conv2d(config['c1'], config['c2'], 3, 1)
        self.batch_norm2 = nn.BatchNorm2d(config['c2'])
        self.conv3 = nn.Conv2d(config['c2'], config['c3'], 3, 1)
        self.batch_norm3 = nn.BatchNorm2d(config['c3'])
        self.dropout1 = nn.Dropout2d(0.2)
        self.fc1 = nn.Linear(960*config['c3'], config['fc1'])
        self.dropout2 = nn.Dropout2d(0.2)
        self.fc2 = nn.Linear(config['fc1'], 2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
 
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        x = self.conv3(x)
        x = self.batch_norm3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        
        x = self.fc2(x)
        logits = x
        
        return logits

# Specifying the training and testing procedure

In [5]:
def train_and_test_seti(config):
    # Load data
    train_dataloader, test_dataloader = load_data_loaders()
    
    # Initiate model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = NeuralNetwork(config).to(device)
    
    # Choose loss funtion and optimizer
    # We need to weight the outputs of the loss, since the dataset is imbalanced
    weight_for_0 = (1/45471) * 50165/2
    weight_for_1 = (1/4694) * 50165/2
    weight = torch.tensor([weight_for_0, weight_for_1], device=device)
    loss_fn = nn.CrossEntropyLoss(weight=weight)
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
    
    # The neural network training procedure
    size = len(train_dataloader.dataset)
    for i in range(config['epochs']):
        for batch, (X, y) in enumerate(train_dataloader):
            X, y = X.to(device), y.to(device)

            # Compute prediction error
            pred = model(X)
            loss = loss_fn(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # The neural network testing procedure
    size = len(test_dataloader.dataset)
    model.eval()
    test_loss = 0
    metric_acc = torchmetrics.Accuracy(num_classes=2, average='macro')        # Compute the average accuracy for both classes, since the data is imbalanced
    metric_acc = metric_acc.to(device)
    with torch.no_grad():
        for X, y in test_dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            pred_probs = pred.softmax(dim=-1)
            metric_acc.update(pred_probs, y)

    # Compute metrics and report them to ray tune
    test_loss /= size
    accuracy = metric_acc.compute().item()
    tune.report(loss=test_loss, accuracy=accuracy)

# Main function for hyperparameter optimization

In [6]:
def main(num_samples=10, max_num_epochs=10):
    # Specify the hyperparameter configuration space that will be optimized
    config = {'lr': tune.loguniform(1e-4, 1e-2),
              'c1': tune.choice([16, 32, 64]),
              'c2': tune.choice([16, 32, 64]),
              'c3': tune.choice([16, 32, 64]),
              'fc1': tune.choice([16, 32, 64]),
              'epochs': 2}

    reporter = CLIReporter(
        metric_columns=["loss", "accuracy", "training_iteration"])
    
    # Start hyperparameter tuning with ray tune
    result = tune.run(
        train_and_test_seti,
        resources_per_trial={"cpu": 16, "gpu": 1},
        config=config,
        num_samples=num_samples,
        progress_reporter=reporter)

    # Get the best trial and print it out at the end
    best_trial = result.get_best_trial("accuracy", "max", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))


# Run the hyperparameter optimization
if __name__ == "__main__":
    main(num_samples=6)

2021-06-18 10:07:36,116	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-06-18 10:07:49,059	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


== Status ==
Memory usage on this node: 13.0/80.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/16 CPUs, 0/1 GPUs, 0.0/40.75 GiB heap, 0.0/20.37 GiB objects
Result logdir: C:\Users\kaige\ray_results\train_and_test_seti_2021-06-18_10-07-48
Number of trials: 6/6 (6 PENDING)
+---------------------------------+----------+-------+------+------+------+-------+-------------+
| Trial name                      | status   | loc   |   c1 |   c2 |   c3 |   fc1 |          lr |
|---------------------------------+----------+-------+------+------+------+-------+-------------|
| train_and_test_seti_4566d_00000 | PENDING  |       |   32 |   32 |   16 |    64 | 0.000181503 |
| train_and_test_seti_4566d_00001 | PENDING  |       |   32 |   64 |   64 |    64 | 0.000188528 |
| train_and_test_seti_4566d_00002 | PENDING  |       |   32 |   64 |   16 |    64 | 0.000167403 |
| train_and_test_seti_4566d_00003 | PENDING  |       |   32 |   32 |   32 |    16 | 0.00166259  |
| train_and_test_seti_4566d

[2m[36m(pid=10616)[0m Windows fatal exception: access violation
[2m[36m(pid=10616)[0m 


Result for train_and_test_seti_4566d_00001:
  accuracy: 0.5273966789245605
  date: 2021-06-18_10-16-38
  done: false
  experiment_id: a74b36639533442d91f09c596f3ab26d
  hostname: PC-KAI-2018
  iterations_since_restore: 1
  loss: 0.011075107868083937
  node_ip: 192.168.0.73
  pid: 20088
  time_since_restore: 290.07534885406494
  time_this_iter_s: 290.07534885406494
  time_total_s: 290.07534885406494
  timestamp: 1624004198
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4566d_00001
  
== Status ==
Memory usage on this node: 14.9/80.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/40.75 GiB heap, 0.0/20.37 GiB objects
Result logdir: C:\Users\kaige\ray_results\train_and_test_seti_2021-06-18_10-07-48
Number of trials: 6/6 (4 PENDING, 1 RUNNING, 1 TERMINATED)
+---------------------------------+------------+--------------------+------+------+------+-------+-------------+-----------+------------+----------------------+
| Trial name    

[2m[36m(pid=20088)[0m Windows fatal exception: access violation
[2m[36m(pid=20088)[0m 


Result for train_and_test_seti_4566d_00002:
  accuracy: 0.6039847135543823
  date: 2021-06-18_10-21-05
  done: false
  experiment_id: b531120151bc438a94882cfdd4699550
  hostname: PC-KAI-2018
  iterations_since_restore: 1
  loss: 0.01037818771481312
  node_ip: 192.168.0.73
  pid: 5384
  time_since_restore: 265.03654313087463
  time_this_iter_s: 265.03654313087463
  time_total_s: 265.03654313087463
  timestamp: 1624004465
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4566d_00002
  
== Status ==
Memory usage on this node: 15.1/80.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/40.75 GiB heap, 0.0/20.37 GiB objects
Result logdir: C:\Users\kaige\ray_results\train_and_test_seti_2021-06-18_10-07-48
Number of trials: 6/6 (3 PENDING, 1 RUNNING, 2 TERMINATED)
+---------------------------------+------------+-------------------+------+------+------+-------+-------------+-----------+------------+----------------------+
| Trial name       

[2m[36m(pid=5384)[0m Windows fatal exception: access violation
[2m[36m(pid=5384)[0m 


Result for train_and_test_seti_4566d_00003:
  accuracy: 0.5
  date: 2021-06-18_10-24-51
  done: false
  experiment_id: 3933332f0483448097ff39e988313fc3
  hostname: PC-KAI-2018
  iterations_since_restore: 1
  loss: 0.010847522684171622
  node_ip: 192.168.0.73
  pid: 860
  time_since_restore: 223.08587646484375
  time_this_iter_s: 223.08587646484375
  time_total_s: 223.08587646484375
  timestamp: 1624004691
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4566d_00003
  
== Status ==
Memory usage on this node: 13.6/80.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/40.75 GiB heap, 0.0/20.37 GiB objects
Result logdir: C:\Users\kaige\ray_results\train_and_test_seti_2021-06-18_10-07-48
Number of trials: 6/6 (2 PENDING, 1 RUNNING, 3 TERMINATED)
+---------------------------------+------------+------------------+------+------+------+-------+-------------+-----------+------------+----------------------+
| Trial name                      |

[2m[36m(pid=860)[0m Windows fatal exception: access violation
[2m[36m(pid=860)[0m 


Result for train_and_test_seti_4566d_00004:
  accuracy: 0.598851203918457
  date: 2021-06-18_10-28-42
  done: false
  experiment_id: 6148db0579e44704ac91e01682669f69
  hostname: PC-KAI-2018
  iterations_since_restore: 1
  loss: 0.010387558030735208
  node_ip: 192.168.0.73
  pid: 8504
  time_since_restore: 228.9652464389801
  time_this_iter_s: 228.9652464389801
  time_total_s: 228.9652464389801
  timestamp: 1624004922
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4566d_00004
  
== Status ==
Memory usage on this node: 14.2/80.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/40.75 GiB heap, 0.0/20.37 GiB objects
Result logdir: C:\Users\kaige\ray_results\train_and_test_seti_2021-06-18_10-07-48
Number of trials: 6/6 (1 PENDING, 1 RUNNING, 4 TERMINATED)
+---------------------------------+------------+-------------------+------+------+------+-------+-------------+-----------+------------+----------------------+
| Trial name          

[2m[36m(pid=8504)[0m Windows fatal exception: access violation
[2m[36m(pid=8504)[0m 
[2m[36m(pid=20904)[0m Windows fatal exception: access violation
[2m[36m(pid=20904)[0m 
2021-06-18 10:32:35,222	INFO tune.py:549 -- Total run time: 1486.50 seconds (1486.29 seconds for the tuning loop).


Result for train_and_test_seti_4566d_00005:
  accuracy: 0.5999444723129272
  date: 2021-06-18_10-32-35
  done: false
  experiment_id: 84ce29ccfb8d4b59a78f7cc25caac32a
  hostname: PC-KAI-2018
  iterations_since_restore: 1
  loss: 0.010315297358796233
  node_ip: 192.168.0.73
  pid: 20904
  time_since_restore: 230.1755654811859
  time_this_iter_s: 230.1755654811859
  time_total_s: 230.1755654811859
  timestamp: 1624005155
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4566d_00005
  
== Status ==
Memory usage on this node: 14.0/80.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/40.75 GiB heap, 0.0/20.37 GiB objects
Result logdir: C:\Users\kaige\ray_results\train_and_test_seti_2021-06-18_10-07-48
Number of trials: 6/6 (1 RUNNING, 5 TERMINATED)
+---------------------------------+------------+--------------------+------+------+------+-------+-------------+-----------+------------+----------------------+
| Trial name                  