# Install libraries (set up for google colab)

In [1]:
#!pip install torch==2.3.0 torchvision==0.18.0 tensorboard==2.16.2 optuna==3.6.0 scikit-learn==1.4.2 kaggle==1.6.12

# Import modules

In [2]:
import os
import zipfile
import torch
import torchvision
from torchvision import datasets, transforms
import random
from tqdm.auto import tqdm
from pathlib import Path
import shutil
import pickle
import numpy as np
from datetime import datetime
from typing import Tuple, Dict, Union, List
import optuna
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


# Download dataset

In [3]:
data_dir = Path('data/')

if data_dir.is_dir():
    print('Folder already exists')
else:
    # Downloading subset of food101
    print('Downloading subset of Food101 dataset')
    data_dir.mkdir(parents=True,exist_ok=True)
    !kaggle datasets download -d satish1v/food101subset -p data/
    zip_path = data_dir / 'food101subset.zip'
    with zipfile.ZipFile(zip_path) as zipref:
        zipref.extractall(data_dir)
    os.remove(zip_path)

    # Keeping only 3 food subfolders instead of 101
    sub_folders = os.listdir(data_dir)
    labels = ['clam_chowder', 'donuts', 'ice_cream']
    for sub_folder in [sub_folder for sub_folder in sub_folders if (data_dir/sub_folder).is_dir() and sub_folder in labels]:
        sub_folder_path = data_dir / sub_folder
        shutil.rmtree(sub_folder_path)

    # Save labels for later deployment
    class_to_idx = {i:label for i,label in enumerate(labels)}
    if not Path('class_to_idx.pkl').is_file():
        with open('class_to_idx.pkl', 'wb') as file:
            pickle.dump(class_to_idx, file)

Folder already exists


# Experimentation setup

In [4]:
model_names = ['shufflenet_v2_x0_5','mobilenet_v3_small']

LR_rates = np.logspace(-1, -3, num=3)

dropout_rates = [0.3,0.4,0.5]

# Helper functions

In [5]:
def train_step(model:torch.nn.Module,
               dataloader:torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device:torch.device) -> Tuple[float,float,float,float,float]:
    # train mode
    model.train()

    # Set up metrics
    train_loss, train_acc, train_precision, train_recall, train_f1 = 0, 0, 0, 0, 0

    # Loop through batches
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        logits = model(X)

        # 2. Compute loss
        loss = loss_fn(logits, y)

        # 3. Zero gradients
        optimizer.zero_grad()

        # 4. Backward propagation
        loss.backward()

        # 5. Update weights and biases
        optimizer.step()

        # Get the most confident predictions
        predictions = torch.argmax(logits,dim=1).cpu()
        y = y.cpu()

        # Calculate metrics
        train_loss += loss.item()
        train_acc += accuracy_score(predictions, y)
        train_precision += precision_score(predictions, y, average='macro')
        train_recall += recall_score(predictions, y, average='macro')
        train_f1 += f1_score(predictions, y, average='macro')

    # Averaging metrics
    train_loss = round(train_loss/len(dataloader),2)
    train_acc = round(train_acc/len(dataloader),2)
    train_precision = round(train_precision/len(dataloader),2)
    train_recall = round(train_recall/len(dataloader),2)
    train_f1 = round(train_f1/len(dataloader),2)

    return train_loss, train_acc, train_precision, train_recall, train_f1


def test_step(model:torch.nn.Module,
              dataloader:torch.utils.data.DataLoader,
              loss_fn:torch.nn.Module,
              device:torch.device) -> Tuple[float,float,float,float,float]:

    # Set up metrics
    test_loss, test_acc, test_precision, test_recall, test_f1 = 0, 0, 0, 0, 0

    # Turn off updating gradients
    model.eval()
    with torch.inference_mode():
        # Loop through batches
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            # 1. Forward pass
            logits = model(X)

            # 2. Compute loss
            loss = loss_fn(logits, y)

            # Get the most confident predictions
            predictions = torch.argmax(logits,dim=1).cpu()
            y = y.cpu()

            # Calculate metrics
            test_loss += loss.item()
            test_acc += accuracy_score(predictions, y)
            test_precision += precision_score(predictions, y, average='macro')
            test_recall += recall_score(predictions, y, average='macro')
            test_f1 += f1_score(predictions, y, average='macro')

    # Averaging metrics
    test_loss = round(test_loss/len(dataloader),2)
    test_acc = round(test_acc/len(dataloader),2)
    test_precision = round(test_precision/len(dataloader),2)
    test_recall = round(test_recall/len(dataloader),2)
    test_f1 = round(test_f1/len(dataloader),2)

    return test_loss, test_acc, test_precision, test_recall, test_f1


def create_writer(model_name:str, learning_rate:float, dropout_rate:float) -> SummaryWriter:
    log_dir = Path("experiments") / model_name / f'LR{learning_rate}' / f'DR{int(dropout_rate*100)}perc'
    return SummaryWriter(log_dir=log_dir)


def remove_empty_logs(model_name:str, learning_rate:float, dropout_rate:float, log_dir:str):
    log_dir = Path(log_dir)
    log_dir.mkdir(parents=True,exist_ok=True)
    certain_log_dir = Path("experiments") / model_name / f'LR{learning_rate}' / f'DR{int(dropout_rate*100)}perc'
    empty_logs = [file for file in os.listdir(log_dir) if (log_dir/file).is_file()]
    for empty_log in empty_logs:
        os.remove(certain_log_dir/empty_logs)


def standardize_model(model_name:str, out_features:int, device:torch.device):
    if model_name=='mobilenet_v3_small':
        weights = torchvision.models.MobileNet_V3_Small_Weights.DEFAULT
        model = torchvision.models.mobilenet_v3_small(weights=weights)

    elif model_name=='shufflenet_v2_x0_5':
        weights = torchvision.models.ShuffleNet_V2_X0_5_Weights.DEFAULT
        model = torchvision.models.shufflenet_v2_x0_5(pretrained=True)
        # Making the last layer similar to first two models for purposes of experiment
        model.classifier = model.fc
        del model.fc
        model.classifier = torch.nn.Sequential(
            torch.nn.Dropout(p=random.random()),
            model.classifier
        )

        # Overwriting _forward_impl method for shufflenet_v2_x0_5 to change "fc" to "classifier"
        def custom_shufflenet_forward_impl(self, x:torch.Tensor) -> torch.Tensor:
            # See note [TorchScript super()]
            x = self.conv1(x)
            x = self.maxpool(x)
            x = self.stage2(x)
            x = self.stage3(x)
            x = self.stage4(x)
            x = self.conv5(x)
            x = x.mean([2, 3])  # globalpool
            x = self.classifier(x)
            return x

        # Patch the _forward_impl method
        model._forward_impl = custom_shufflenet_forward_impl.__get__(model, torchvision.models.ResNet)

    model.classifier[-1] = torch.nn.Linear(in_features=model.classifier[-1].in_features,
                                            out_features=out_features, bias=True)
    model.to(device)
    return model, weights


def create_dataloaders(train_threshold, transformation, batch_size):
    # Create datasets
    dataset = datasets.ImageFolder(root=data_dir, transform=transformation)
    train_size = int(train_threshold * len(dataset))
    test_size = int(len(dataset) - train_size)
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create dataloaders
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_dataloader, test_dataloader


def train(model_name:str,
            batch_size:int,
            dropout_rate:float,
            epochs:int,
            out_features:int,
            train_threshold:float,
            learning_rate:float,
            device:torch.device,
            optimizer_class:torch.optim,
            loss_fn:torch.nn,
            save_model:bool,
            track_logs:bool,
            log_dir:str = None) -> float:
    # Create and change model's architecture to be similar to other models
    model, weights = standardize_model(model_name, out_features, device)

    # Putting only classifier params equal to freezing feature extraction layers
    optimizer = optimizer_class(model.classifier.parameters(), lr=learning_rate)

    # Get model's default transform
    auto_transformation = weights.transforms()

    # Get iterable data batches
    train_dataloader, test_dataloader = create_dataloaders(train_threshold, auto_transformation, batch_size)

    # Change dropout rate
    for i, layer in enumerate(model.classifier):
        if type(layer)==torch.nn.modules.dropout.Dropout:
            model.classifier[i] = torch.nn.Dropout(p=dropout_rate, inplace=False)

    for epoch in tqdm(range(1,epochs+1)):
        # Train step
        train_loss, train_acc, train_precision, train_recall, train_f1 = train_step(model,train_dataloader,loss_fn,optimizer,device)

        # Test step
        test_loss, test_acc, test_precision, test_recall, test_f1 = test_step(model,test_dataloader,loss_fn,device)

        if track_logs:
            writer = create_writer(model_name, learning_rate, dropout_rate)

            # Add results to SummaryWriter
            writer.add_scalars(main_tag='CrossEntropyLoss',
                            tag_scalar_dict={'train':train_loss,
                                                'test':test_loss},
                                global_step=epoch)
            writer.add_scalars(main_tag='Accuracy',
                            tag_scalar_dict={'train':train_acc,
                                                'test':test_acc},
                                global_step=epoch)
            writer.add_scalars(main_tag='Precision',
                            tag_scalar_dict={'train':train_precision,
                                                'test':test_precision},
                                global_step=epoch)
            writer.add_scalars(main_tag='Recall',
                            tag_scalar_dict={'train':train_recall,
                                                'test':test_recall},
                                global_step=epoch)

            writer.add_scalars(main_tag='F1',
                                tag_scalar_dict={'train':train_f1,
                                                    'test':test_f1},
                                global_step=epoch)

            writer.close()

            # Removing unnecessary files in log_dir
            remove_empty_logs(model_name, learning_rate, dropout_rate, log_dir)

    if save_model:
        # Create models/ folder if not exist
        Path('models/').mkdir(parents=True,exist_ok=True)
        model_path = Path('models/') / f'{model_name}_LR{learning_rate}_DR{int(dropout_rate*100)}perc.pt'

        # Save model
        torch.save(model.state_dict(), model_path)
        print(f'\n{"-"*80}\nSaving model to "{model_path}"\n{"-"*80}\n')

    return test_acc


def objective(trial,
            model_names:List[str],
            batch_size:int,
            dropout_rates:List[float],
            epochs:int,
            out_features:int,
            train_threshold:float,
            learning_rates:List[float],
            device:torch.device,
            optimizer_class:torch.optim,
            loss_fn:torch.nn,
            save_model:bool,
            track_logs:bool,
            log_dir:str = None):

    # Ensure the required parameter is provided when a specific condition is met
    if track_logs and log_dir is None:
        raise ValueError("log_dir must be specified when track_logs is True")

    # Suggest hyperparameters
    model_name = trial.suggest_categorical('model_name', model_names)
    learning_rate = trial.suggest_categorical('learning_rate', learning_rates)
    dropout_rate = trial.suggest_categorical('dropout_rate', dropout_rates)

    # Train the model
    test_acc = train(model_name,batch_size,
            dropout_rate,epochs,out_features,
            train_threshold,learning_rate,device,
            optimizer_class,loss_fn,save_model,track_logs,log_dir)

    # Define the stopping condition
    if test_acc >= 0.95:  # Example stopping condition
        trial.study.stop()

    return test_acc

# Settings

In [6]:
# Set the seed for reproducibility
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Set parameters
train_threshold = 0.8
out_features = len(os.listdir(data_dir))
epochs = 10
batch_size = 3500
max_optuna_trials = 50
loss_fn = torch.nn.CrossEntropyLoss()
optimizer_class = torch.optim.Adam
track_logs = True
save_model = False

log_dir = 'experiments'

# Set the device on all tensors by default
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

# Experimenting by iterating through "experimentation setup"

In [7]:
%%time
# Get the current date as study_name for optuna
current_date = datetime.now().strftime("%Y-%m-%d")

# Use Optuna to find the best parameters
study = optuna.create_study(direction='maximize', study_name=current_date)

# Train, track metrics
study.optimize(lambda trial: objective(trial,model_names,batch_size,
                                        dropout_rates,epochs,out_features,
                                        train_threshold,LR_rates,device,
                                        optimizer_class,loss_fn,save_model,track_logs,log_dir), n_trials=max_optuna_trials)

[I 2024-05-24 11:41:59,909] A new study created in memory with name: 2024-05-24
100%|██████████| 10/10 [01:12<00:00,  7.28s/it]
[I 2024-05-24 11:43:12,736] Trial 0 finished with value: 0.73 and parameters: {'model_name': 'shufflenet_v2_x0_5', 'learning_rate': 0.001, 'dropout_rate': 0.4}. Best is trial 0 with value: 0.73.
100%|██████████| 10/10 [01:39<00:00,  9.99s/it]
[I 2024-05-24 11:44:52,671] Trial 1 finished with value: 0.82 and parameters: {'model_name': 'mobilenet_v3_small', 'learning_rate': 0.001, 'dropout_rate': 0.3}. Best is trial 1 with value: 0.82.
100%|██████████| 10/10 [01:40<00:00, 10.01s/it]
[I 2024-05-24 11:46:32,843] Trial 2 finished with value: 0.78 and parameters: {'model_name': 'mobilenet_v3_small', 'learning_rate': 0.01, 'dropout_rate': 0.4}. Best is trial 1 with value: 0.82.
100%|██████████| 10/10 [01:41<00:00, 10.19s/it]
[I 2024-05-24 11:48:14,829] Trial 3 finished with value: 0.72 and parameters: {'model_name': 'mobilenet_v3_small', 'learning_rate': 0.1, 'dropou

CPU times: total: 7h 9min 3s
Wall time: 1h 9min 28s


# Save the model with the best config

In [8]:
# Observe the best trial
best_params = study.best_params
print(f'The best parameters:\n  {best_params}\nWith the test accuracy_score {study.best_trial.value}\n')

# Re-train the model with best params
save_model = True
track_logs = False
model, weights = standardize_model(best_params['model_name'], out_features, device)

train(best_params['model_name'],batch_size,
                best_params['dropout_rate'],epochs,out_features,
                train_threshold,best_params['learning_rate'],device,
                optimizer_class,loss_fn,save_model,track_logs)

The best parameters:
  {'model_name': 'shufflenet_v2_x0_5', 'learning_rate': 0.01, 'dropout_rate': 0.3}
With the test accuracy_score 0.91



100%|██████████| 10/10 [01:14<00:00,  7.48s/it]


--------------------------------------------------------------------------------
Saving model to "models\shufflenet_v2_x0_5_LR0.01_DR30perc.pt"
--------------------------------------------------------------------------------






0.85

# Checking metrics in tensorboard

In [9]:
%load_ext tensorboard
%tensorboard --logdir experiments

Reusing TensorBoard on port 6006 (pid 9156), started 11:29:38 ago. (Use '!kill 9156' to kill it.)