<a href="https://colab.research.google.com/github/lorrespz/Image-Classification-Collection/blob/main/LSTM_for_image_classification_%5BPyTorch_%2B_FashionMNIST%5D%3B_Hyperparameter_tuning_with_Optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM for image classification [FashionMNIST]; Hyperparameter tuning with Optuna

In [1]:
! pip install optuna



In [2]:
import os
import optuna
from optuna.trial import TrialState

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [3]:
device =  torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 20
N_TRAIN_EXAMPLES = BATCHSIZE * 100
N_VALID_EXAMPLES = BATCHSIZE * 40

In [4]:
device, N_TRAIN_EXAMPLES, N_VALID_EXAMPLES

(device(type='cuda', index=0), 12800, 5120)

In [5]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

# Define LSTM model

In [6]:


class RNN(nn.Module):
  def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs, device=device):
    super(RNN,self).__init__()
    self.D = n_inputs
    self.M = n_hidden
    self.K = n_outputs
    self.L = n_rnnlayers

    self.rnn = nn.LSTM(input_size = self.D,
                       hidden_size = self.M,
                       num_layers = self.L,
                       batch_first = True)
    self.fc = nn.Linear(self.M, self.K)

  def forward(self, X):
    #initial hidden states
    h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
    c0 = torch.zeros(self.L, X.size(0), self.M).to(device)

    #get LSTM unit output:
    out, _ = self.rnn(X, (h0,c0))

    #we only want h(T) at the final time step
    out = self.fc(out[:, -1, :])

    return out


# Hyperparameter tuning

In [7]:
def define_model(trial):
  #trial.suggest_in(low,high,step)
  hidden_size = trial.suggest_int('hidden_size', 64, 512, 64)
  num_layers = trial.suggest_int('num_layers', 1,4,1)
  model = RNN(28, hidden_size, num_layers, 10, device = device)
  return model

In [10]:

def objective(trial):
    # Generate the model.
    model = define_model(trial).to(device)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(device), target.to(device)
            # reshape the input
            data = data.view(-1, 28, 28)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(device), target.to(device)
                # reshape the input
                data = data.view(-1, 28, 28)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)

[I 2024-03-26 10:11:53,925] A new study created in memory with name: no-name-cecfc637-bd03-439f-a618-f9cb83b8499a
  hidden_size = trial.suggest_int('hidden_size', 64, 512, 64)
  num_layers = trial.suggest_int('num_layers', 1,4,1)
[I 2024-03-26 10:12:47,708] Trial 0 finished with value: 0.0994140625 and parameters: {'hidden_size': 256, 'num_layers': 3, 'optimizer': 'Adam', 'lr': 3.644398701832356e-05}. Best is trial 0 with value: 0.0994140625.
[I 2024-03-26 10:13:46,221] Trial 1 finished with value: 0.101171875 and parameters: {'hidden_size': 448, 'num_layers': 2, 'optimizer': 'RMSprop', 'lr': 0.0007578415419437125}. Best is trial 1 with value: 0.101171875.
[I 2024-03-26 10:14:32,365] Trial 2 finished with value: 0.0990234375 and parameters: {'hidden_size': 128, 'num_layers': 3, 'optimizer': 'SGD', 'lr': 0.00019752572319019018}. Best is trial 1 with value: 0.101171875.
[I 2024-03-26 10:15:20,037] Trial 3 finished with value: 0.1021484375 and parameters: {'hidden_size': 512, 'num_layers'