<a href="https://colab.research.google.com/github/suinkangme/COMP432-GroupI/blob/main/hyperparameter_search_with_Orion_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!gdown --fuzzy 'https://drive.google.com/file/d/1NFGnrygrAZmoDpSwoL66ZjMlDBo7I5NN/view?usp=sharing'
!gdown --fuzzy 'https://drive.google.com/file/d/1-1HrZIEGdHrsKzmtz7xikOFHhKOqvpH7/view?usp=drive_link'

Downloading...
From: https://drive.google.com/uc?id=1NFGnrygrAZmoDpSwoL66ZjMlDBo7I5NN
To: /content/train_data.npz
100% 869M/869M [00:11<00:00, 76.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-1HrZIEGdHrsKzmtz7xikOFHhKOqvpH7
To: /content/test_data.npz
100% 218M/218M [00:02<00:00, 97.1MB/s]


In [None]:
%%capture
!pip install git+https://github.com/epistimio/orion.git@develop
!pip install orion[profet]

In [None]:
%%file train.py

import argparse
import numpy as np
import sklearn
import sklearn.preprocessing
import sklearn.neural_network
from orion.client import report_objective # Orion
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision.models import resnet18
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import seaborn as sns

def train():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batchsize', type=int, default=64,
                        help='input batch size for training (default: 64)')
    parser.add_argument('--epochs', type=int, default=10,
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='learning rate (default: 1.0)')
    parser.add_argument('--eval', type=bool, default=False,
                        help='If True it prints the test error (default: False)')
    parser.add_argument("-f", required=False)
    args = parser.parse_args()

    with np.load("train_data.npz") as data:
        X_trn = data['images']
        y_trn = data['labels']

    with np.load("test_data.npz") as data:
        X_tst = data['images']
        y_tst = data['labels']

    X_trn, X_valid, y_trn, y_valid = train_test_split(X_trn, y_trn, test_size = 0.375)

    X_train_tensor = torch.from_numpy(X_trn)
    y_train_tensor = torch.from_numpy(y_trn)
    X_test_tensor = torch.from_numpy(X_tst)
    y_test_tensor = torch.from_numpy(y_tst)
    X_valid_tensor = torch.from_numpy(X_valid)
    y_valid_tensor = torch.from_numpy(y_valid)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    valid_dataset= TensorDataset(X_valid_tensor, y_valid_tensor)

    classes = ['MUS', 'NORM', 'STR']

    train_loader = DataLoader(train_dataset, batch_size= args.batchsize, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size= args.batchsize, shuffle=False)
    valid_loader = DataLoader(valid_dataset, batch_size = args.batchsize, shuffle = False)

    model = resnet18(weights = None)
    num_classes = len(classes)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)

    optimizer = optim.Adam(model.parameters(), lr = args.lr)
    criterion = nn.CrossEntropyLoss()

    # Training
    # Training the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(args.epochs):
      model.train()
      running_loss = 0.0
      correct = 0
      total = 0
      for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        batch_size= images.shape[0]
        h= images.shape[1]
        w= images.shape[2]
        channels= images.shape[3]
        images= torch.reshape(images,(batch_size,channels,h,w))
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        #running_loss += loss.item()

    # validation
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
      for images, labels in valid_loader:
        images, labels = images.to(device), labels.to(device)
        batch_size= images.shape[0]
        h= images.shape[1]
        w= images.shape[2]
        channels= images.shape[3]
        images= torch.reshape(images,(batch_size,channels,h,w))
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    valid_error = 100*(1 - (correct/total))
    print("Valid Error (\%): " + str(valid_error))

    report_objective(valid_error)

    if args.eval:
      running_loss = 0
      total = 0
      correct = 0
      with torch.no_grad():
        for data in test_loader:
          images, labels = data
          images, labels = images.to(device), labels.to(device)
          batch_size= images.shape[0]
          h= images.shape[1]
          w= images.shape[2]
          channels= images.shape[3]
          images= torch.reshape(images,(batch_size,channels,h,w))
          outputs = model(images)
          loss = criterion(outputs, labels)
          running_loss += loss.item()
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
      test_error = 100*(1-(correct/total))
      print("Test Error (\%): " + str(test_error))

if __name__ == '__main__':
    train()

Writing train.py


In [None]:
!orion hunt -n hp_adam --exp-max-trials=15 python train.py --lr~'loguniform(1e-4, 0.1)' --batchsize~'choices([16, 32, 64])' --epochs~'uniform(15, 30, discrete=True)'

Valid Error (\%): 24.22222222222222
Valid Error (\%): 15.666666666666663
Valid Error (\%): 21.111111111111114
Valid Error (\%): 23.944444444444446
Valid Error (\%): 25.055555555555554
Valid Error (\%): 22.611111111111114
Valid Error (\%): 23.05555555555555
Valid Error (\%): 24.055555555555554
Valid Error (\%): 24.33333333333333
Valid Error (\%): 19.333333333333336
Valid Error (\%): 36.111111111111114
Valid Error (\%): 17.94444444444444
Valid Error (\%): 15.44444444444445
Valid Error (\%): 25.611111111111107
Valid Error (\%): 15.555555555555555
Search finished successfully

Stats
=====
completed: True
trials completed: 15
best trial:
  id: f204c03d381fcd7bf9f142d90823705e
  evaluation: 15.44444444444445
  params:
    /batchsize: 64
    /epochs: 19
    /lr: 0.001159
start time: 2023-11-13 19:40:48.884333
finish time: 2023-11-13 20:38:42.474289
elapsed_time: 0:57:53.115602


Hints
=====

Info
----

To get more information on the experiment, run the command

orion info --name hp_adam --ver

In [None]:
!orion info --name hp_adam --version 1

Identification
name: hp_adam
version: 1
user: root


Commandline
python train.py --lr~loguniform(1e-4, 0.1) --batchsize~choices([16, 32, 64]) --epochs~uniform(15, 30, discrete=True)


Config
max trials: 15
max broken: 3
working dir: 


Algorithm
random:
    seed: None


Space
=====
/batchsize: choices([16, 32, 64])
/epochs: uniform(15, 30, discrete=True)
/lr: loguniform(0.0001, 0.1)


Meta-data
user: root
datetime: 2023-11-13 19:40:48.884333
orion version: 0.2.6.post333+gd51e6ea8
VCS:



Parent experiment
root:
parent:
adapter:


Stats
=====
completed: True
trials completed: 15
best trial:
  id: f204c03d381fcd7bf9f142d90823705e
  evaluation: 15.44444444444445
  params:
    /batchsize: 64
    /epochs: 19
    /lr: 0.001159
start time: 2023-11-13 19:40:48.884333
finish time: 2023-11-13 20:38:42.474289
elapsed_time: 0:57:53.115602




In [None]:
!python train.py --lr=0.001159 --epochs=19 --batchsize=64 --eval='True'

Valid Error (\%): 14.94444444444445
[{'name': 'objective', 'type': 'objective', 'value': 14.94444444444445}]
Test Error (\%): 17.500000000000004


First run disconnected: results here:

2023-11-13 17:33:09,346::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
2023-11-13 17:33:09,462::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
2023-11-13 17:33:14,216::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
2023-11-13 17:33:14,685::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 27.0
2023-11-13 17:37:14,750::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 31.666666666666664
2023-11-13 17:40:35,544::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 27.44444444444445
2023-11-13 17:44:31,909::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 13.555555555555554
2023-11-13 17:48:54,486::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 21.333333333333336
2023-11-13 17:52:46,323::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 25.388888888888893
2023-11-13 17:57:03,199::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 16.11111111111111
2023-11-13 18:01:45,248::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 20.611111111111114
2023-11-13 18:05:34,455::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 32.611111111111114
2023-11-13 18:08:37,094::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 19.444444444444443
2023-11-13 18:12:03,111::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 22.22222222222222
2023-11-13 18:15:26,246::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 15.888888888888886
2023-11-13 18:19:01,091::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 22.277777777777775
2023-11-13 18:21:49,862::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 16.77777777777778
2023-11-13 18:26:16,074::WARNING::root::Script /content/train.py is not in a git repository. Code modification won't be detected.
Valid Error (\%): 23.33333333333333
Search finished successfully

Stats
=====
completed: True
trials completed: 15
best trial:
  id: 13e9799d5615f66c6b84e7fe7a295047
  evaluation: 13.555555555555554
  params:
    /batchsize: 32
    /epochs: 26
    /lr: 0.0006515
start time: 2023-11-13 17:33:14.202656
finish time: 2023-11-13 18:29:18.666711
elapsed_time: 0:56:03.995837


Hints
=====

Info
----

To get more information on the experiment, run the command

orion info --name hp_adam --version 1


In [None]:
#!python train.py --lr=0.001399 --epochs=19 --batchsize=64 --eval='True' old value validation set with 0.125 of training set

Valid Error (\%): 13.33333333333333
[{'name': 'objective', 'type': 'objective', 'value': 13.33333333333333}]
Test Error (\%): 15.249999999999996


In [None]:
#!python train.py --lr=0.001588 --epochs=41 --batchsize=64 --eval='True' - old value validation set 0.1 of training set

Valid Error (\%): 13.749999999999996
[{'name': 'objective', 'type': 'objective', 'value': 13.749999999999996}]
Test Error (\%): 12.83333333333333
