<a href="https://colab.research.google.com/github/lorrespz/Image-Classification-Collection/blob/main/LSTM_for_image_classification_%5BPyTorch_%2B_FashionMNIST%5D%3B_Hyperparameter_tuning_with_Optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM for image classification [FashionMNIST]; Hyperparameter tuning with Optuna

In [1]:
! pip install optuna



In [2]:
import os
import optuna
from optuna.trial import TrialState

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [3]:
device =  torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 20
N_TRAIN_EXAMPLES = BATCHSIZE * 100
N_VALID_EXAMPLES = BATCHSIZE * 40

In [4]:
device, N_TRAIN_EXAMPLES, N_VALID_EXAMPLES

(device(type='cuda', index=0), 12800, 5120)

In [5]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

# Define LSTM model

In [6]:


class RNN(nn.Module):
  def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs, device=device):
    super(RNN,self).__init__()
    self.D = n_inputs
    self.M = n_hidden
    self.K = n_outputs
    self.L = n_rnnlayers

    self.rnn = nn.LSTM(input_size = self.D,
                       hidden_size = self.M,
                       num_layers = self.L,
                       batch_first = True)
    self.fc = nn.Linear(self.M, self.K)

  def forward(self, X):
    #initial hidden states
    h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
    c0 = torch.zeros(self.L, X.size(0), self.M).to(device)

    #get LSTM unit output:
    out, _ = self.rnn(X, (h0,c0))

    #we only want h(T) at the final time step
    out = self.fc(out[:, -1, :])

    return out


In [7]:

criterion = nn.CrossEntropyLoss()

# Hyperparameter tuning

In [8]:
def define_model(trial):
  #trial.suggest_in(low,high,step)
  hidden_size = trial.suggest_int('hidden_size', 64, 512, 64)
  num_layers = trial.suggest_int('num_layers', 1,4,1)
  model = RNN(28, hidden_size, num_layers, 10, device = device)
  return model

In [9]:

def objective(trial):
    # Generate the model.
    model = define_model(trial).to(device)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(device), target.to(device)
            # reshape the input
            data = data.view(-1, 28, 28)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model: calculate accuracy only.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(device), target.to(device)
                # reshape the input
                data = data.view(-1, 28, 28)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [10]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=1000)

[I 2024-03-26 10:32:24,784] A new study created in memory with name: no-name-ad9bc13b-b562-4254-a009-55c5e445283c
  hidden_size = trial.suggest_int('hidden_size', 64, 512, 64)
  num_layers = trial.suggest_int('num_layers', 1,4,1)
[I 2024-03-26 10:33:14,676] Trial 0 finished with value: 0.8587890625 and parameters: {'hidden_size': 64, 'num_layers': 2, 'optimizer': 'RMSprop', 'lr': 0.00194380655705587}. Best is trial 0 with value: 0.8587890625.
[I 2024-03-26 10:34:11,704] Trial 1 finished with value: 0.0990234375 and parameters: {'hidden_size': 256, 'num_layers': 4, 'optimizer': 'Adam', 'lr': 0.05832420275416582}. Best is trial 0 with value: 0.8587890625.
[I 2024-03-26 10:35:17,259] Trial 2 finished with value: 0.74765625 and parameters: {'hidden_size': 512, 'num_layers': 2, 'optimizer': 'SGD', 'lr': 0.0634988357581393}. Best is trial 0 with value: 0.8587890625.
[I 2024-03-26 10:36:45,578] Trial 3 finished with value: 0.098046875 and parameters: {'hidden_size': 448, 'num_layers': 3, 'opt

In [11]:

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

In [16]:
pruned_trials[0]

FrozenTrial(number=8, state=TrialState.PRUNED, values=[0.0998046875], datetime_start=datetime.datetime(2024, 3, 26, 10, 41, 11, 78740), datetime_complete=datetime.datetime(2024, 3, 26, 10, 41, 13, 965448), params={'hidden_size': 320, 'num_layers': 3, 'optimizer': 'RMSprop', 'lr': 0.013027321954073048}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.0998046875}, distributions={'hidden_size': IntDistribution(high=512, log=False, low=64, step=64), 'num_layers': IntDistribution(high=4, log=False, low=1, step=1), 'optimizer': CategoricalDistribution(choices=('Adam', 'RMSprop', 'SGD')), 'lr': FloatDistribution(high=0.1, log=True, low=1e-05, step=None)}, trial_id=8, value=None)

In [17]:
pruned_trials[24]

FrozenTrial(number=39, state=TrialState.PRUNED, values=[0.8708984375], datetime_start=datetime.datetime(2024, 3, 26, 10, 48, 21, 484066), datetime_complete=datetime.datetime(2024, 3, 26, 10, 49, 12, 336131), params={'hidden_size': 256, 'num_layers': 1, 'optimizer': 'Adam', 'lr': 0.0187436067907857}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.7271484375, 1: 0.7849609375, 2: 0.8099609375, 3: 0.8265625, 4: 0.850390625, 5: 0.8521484375, 6: 0.8482421875, 7: 0.8525390625, 8: 0.8525390625, 9: 0.85, 10: 0.851953125, 11: 0.8626953125, 12: 0.8658203125, 13: 0.859375, 14: 0.8609375, 15: 0.873828125, 16: 0.8716796875, 17: 0.86796875, 18: 0.8654296875, 19: 0.8708984375}, distributions={'hidden_size': IntDistribution(high=512, log=False, low=64, step=64), 'num_layers': IntDistribution(high=4, log=False, low=1, step=1), 'optimizer': CategoricalDistribution(choices=('Adam', 'RMSprop', 'SGD')), 'lr': FloatDistribution(high=0.1, log=True, low=1e-05, step=None)}, trial_id=39, value=None)

In [12]:

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

Study statistics: 
  Number of finished trials:  40
  Number of pruned trials:  25
  Number of complete trials:  15


In [13]:

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


Best trial:
  Value:  0.8912109375
  Params: 
    hidden_size: 128
    num_layers: 1
    optimizer: RMSprop
    lr: 0.004656129099041159
