# Login to Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/MyDrive

/content/drive/MyDrive


# Fine Tune model for ESC50 dataset

### codes

In [None]:
!pip install timm



In [None]:
import IPython.display as display

import glob
from collections import Counter

import math
import pandas as pd

import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import torch
import torchaudio
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from pathlib import Path
from PIL import Image
import soundfile as sf
from torch.utils.data import Dataset
from torchvision import models, transforms
import timm

import tensorflow as tf
from torch.utils.tensorboard import SummaryWriter
import datetime

from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

import torch.optim.lr_scheduler as lr_scheduler

def evaluate(model, test_loader, device="cpu"):
    model.eval()
    num_correct = 0
    num_examples = 0

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

    accuracy = num_correct / num_examples
    return accuracy

class FrequencyMask(object):
    """
      Example:
        >>> transforms.Compose([
        >>>     transforms.ToTensor(),
        >>>     FrequencyMask(max_width=10, use_mean=False),
        >>> ])

    """

    def __init__(self, max_width, use_mean=True):
        self.max_width = max_width
        self.use_mean = use_mean

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of
            size (C, H, W) where the frequency
            mask is to be applied.

        Returns:
            Tensor: Transformed image with Frequency Mask.
        """
        start = random.randrange(0, tensor.shape[2])
        end = start + random.randrange(1, self.max_width)
        if self.use_mean:
            tensor[:, start:end, :] = tensor.mean()
        else:
            tensor[:, start:end, :] = 0
        return tensor

    def __repr__(self):
        format_string = self.__class__.__name__ + "(max_width="
        format_string += str(self.max_width) + ")"
        format_string += 'use_mean=' + (str(self.use_mean) + ')')

        return format_string


class TimeMask(object):
    """
      Example:
        >>> transforms.Compose([
        >>>     transforms.ToTensor(),
        >>>     TimeMask(max_width=10, use_mean=False),
        >>> ])

    """

    def __init__(self, max_width, use_mean=True):
        self.max_width = max_width
        self.use_mean = use_mean

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of
            size (C, H, W) where the time mask
            is to be applied.

        Returns:
            Tensor: Transformed image with Time Mask.
        """
        start = random.randrange(0, tensor.shape[1])
        end = start + random.randrange(0, self.max_width)
        if self.use_mean:
            tensor[:, :, start:end] = tensor.mean()
        else:
            tensor[:, :, start:end] = 0
        return tensor

    def __repr__(self):
        format_string = self.__class__.__name__ + "(max_width="
        format_string += str(self.max_width) + ")"
        format_string += 'use_mean=' + (str(self.use_mean) + ')')
        return format_string


class PrecomputedESC50(Dataset):
    def __init__(self,path, max_freqmask_width, max_timemask_width, use_mean=True, dpi=50):
        files = Path(path).glob('*.png')
        self.items = [(f,int(f.name.split("-")[-1].replace(".wav.png",""))) for f in files]
        self.length = len(self.items)
        self.max_freqmask_width = max_freqmask_width
        self.max_timemask_width = max_timemask_width
        self.use_mean = use_mean
        self.img_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
            transforms.RandomApply([FrequencyMask(self.max_freqmask_width, self.use_mean)], p=0.5),
            transforms.RandomApply([TimeMask(self.max_timemask_width, self.use_mean)], p=0.5)])

    def __getitem__(self, index):
        filename, label = self.items[index]
        img = Image.open(filename).convert('RGB')
        return (self.img_transforms(img), label)

    def __len__(self):
        return self.length

# Define a function to plot and log confusion matrix to TensorBoard
def plot_confusion_matrix(model, test_loader, device="cpu"):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            predictions = torch.max(F.softmax(output, dim=1), dim=1)[1].cpu().numpy()
            all_predictions.extend(predictions)
            all_labels.extend(targets.cpu().numpy())

    # Generate confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)

    # Create a heatmap of the confusion matrix
    plt.figure(figsize=(20, 16))
    sn.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=True, yticklabels=True)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')

    figure = plt.gcf()
    return figure

# Define a function to log predictions vs. actuals as images to TensorBoard
def log_predictions_vs_actuals(model, data_loader, device="cpu", num_batches=5):
    model.eval()

    batch_counter = 0
    with torch.no_grad():
        for batch in data_loader:
            if batch_counter >= num_batches:
                break

            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            predictions = torch.max(F.softmax(output, dim=1), dim=1)
            predicted_labels = predictions[1]
            probabilities = predictions[0]

            # Convert PyTorch tensors to NumPy arrays
            inputs_np = inputs.permute(0, 2, 3, 1).cpu().numpy()

            # Create a figure for each batch
            fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(12, 12))

            for i, ax in enumerate(axes.flat):
                ax.imshow(inputs_np[i])
                ax.axis("off")

                actual_label = targets[i].item()
                predicted_label = predicted_labels[i].item()
                probability = probabilities[i].item()

                # Color the title based on correctness
                title_color = 'green' if actual_label == predicted_label else 'red'

                ax.set_title(f"Actual: {actual_label}\nPredicted: {predicted_label}\nProb: {probability:.2f}", color=title_color)

            plt.tight_layout()
            batch_counter += 1
    return fig

class EarlyStopping:
    def __init__(self, patience, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_valid_accuracy = 0.0
        self.early_stop = False

    def step(self, valid_accuracy):
        if valid_accuracy > self.best_valid_accuracy:
            self.best_valid_accuracy = valid_accuracy
            self.counter = 0
        else:
            self.counter += 1
            if self.counter > self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping activated.")
        return self.early_stop

class LearningRateScheduler(lr_scheduler._LRScheduler):
    def __init__(self, optimizer, patience, factor=0.1, verbose=False):
        self.optimizer = optimizer
        self.patience = patience
        self.factor = factor
        self.verbose = verbose
        self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=self.patience, factor=self.factor, verbose=self.verbose)

    def step(self, valid_accuracy):
        self.lr_scheduler.step(valid_accuracy)
        return self.optimizer.param_groups[0]['lr']

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")


PATH_ESC50_TRAIN="./train1/"
PATH_ESC50_VALID="./valid1/"
PATH_ESC50_TEST="./test/"

bs=16
esc50pre_train = PrecomputedESC50(PATH_ESC50_TRAIN, max_freqmask_width=10, max_timemask_width=10 )
esc50pre_valid = PrecomputedESC50(PATH_ESC50_VALID,max_freqmask_width=10, max_timemask_width=10 )
esc50pre_test = PrecomputedESC50(PATH_ESC50_TEST,max_freqmask_width=10, max_timemask_width=10 )

esc50_train_loader = torch.utils.data.DataLoader(esc50pre_train, bs, shuffle=True)
esc50_val_loader = torch.utils.data.DataLoader(esc50pre_valid, bs, shuffle=True)
esc50_test_loader = torch.utils.data.DataLoader(esc50pre_test, bs, shuffle=True)

### EnvNet V2

In [None]:
!pip install timm

Collecting timm
  Downloading timm-0.9.10-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.19.0 safetensors-0.4.0 timm-0.9.10


In [None]:
import IPython.display as display

import glob
from collections import Counter

import math
import pandas as pd

import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import torch
import torchaudio
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from pathlib import Path
from PIL import Image
import soundfile as sf
from torch.utils.data import Dataset
from torchvision import models, transforms
import timm

import tensorflow as tf
from torch.utils.tensorboard import SummaryWriter
import datetime

from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

import torch.optim.lr_scheduler as lr_scheduler

def evaluate(model, test_loader, device="cpu"):
    model.eval()
    num_correct = 0
    num_examples = 0

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

    accuracy = num_correct / num_examples
    return accuracy

class FrequencyMask(object):
    """
      Example:
        >>> transforms.Compose([
        >>>     transforms.ToTensor(),
        >>>     FrequencyMask(max_width=10, use_mean=False),
        >>> ])

    """

    def __init__(self, max_width, use_mean=True):
        self.max_width = max_width
        self.use_mean = use_mean

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of
            size (C, H, W) where the frequency
            mask is to be applied.

        Returns:
            Tensor: Transformed image with Frequency Mask.
        """
        start = random.randrange(0, tensor.shape[2])
        end = start + random.randrange(1, self.max_width)
        if self.use_mean:
            tensor[:, start:end, :] = tensor.mean()
        else:
            tensor[:, start:end, :] = 0
        return tensor

    def __repr__(self):
        format_string = self.__class__.__name__ + "(max_width="
        format_string += str(self.max_width) + ")"
        format_string += 'use_mean=' + (str(self.use_mean) + ')')

        return format_string


class TimeMask(object):
    """
      Example:
        >>> transforms.Compose([
        >>>     transforms.ToTensor(),
        >>>     TimeMask(max_width=10, use_mean=False),
        >>> ])

    """

    def __init__(self, max_width, use_mean=True):
        self.max_width = max_width
        self.use_mean = use_mean

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of
            size (C, H, W) where the time mask
            is to be applied.

        Returns:
            Tensor: Transformed image with Time Mask.
        """
        start = random.randrange(0, tensor.shape[1])
        end = start + random.randrange(0, self.max_width)
        if self.use_mean:
            tensor[:, :, start:end] = tensor.mean()
        else:
            tensor[:, :, start:end] = 0
        return tensor

    def __repr__(self):
        format_string = self.__class__.__name__ + "(max_width="
        format_string += str(self.max_width) + ")"
        format_string += 'use_mean=' + (str(self.use_mean) + ')')
        return format_string


class PrecomputedESC50(Dataset):
    def __init__(self,path, max_freqmask_width, max_timemask_width, use_mean=True, dpi=50):
        files = Path(path).glob('*.png')
        self.items = [(f,int(f.name.split("-")[-1].replace(".wav.png",""))) for f in files]
        self.length = len(self.items)
        self.max_freqmask_width = max_freqmask_width
        self.max_timemask_width = max_timemask_width
        self.use_mean = use_mean
        self.img_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
            transforms.RandomApply([FrequencyMask(self.max_freqmask_width, self.use_mean)], p=0.5),
            transforms.RandomApply([TimeMask(self.max_timemask_width, self.use_mean)], p=0.5)])

    def __getitem__(self, index):
        filename, label = self.items[index]
        img = Image.open(filename).convert('RGB')
        return (self.img_transforms(img), label)

    def __len__(self):
        return self.length

# Define a function to plot and log confusion matrix to TensorBoard
def plot_confusion_matrix(model, test_loader, device="cpu"):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            predictions = torch.max(F.softmax(output, dim=1), dim=1)[1].cpu().numpy()
            all_predictions.extend(predictions)
            all_labels.extend(targets.cpu().numpy())

    # Generate confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)

    # Create a heatmap of the confusion matrix
    plt.figure(figsize=(20, 16))
    sn.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=True, yticklabels=True)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')

    figure = plt.gcf()
    return figure

# Define a function to log predictions vs. actuals as images to TensorBoard
def log_predictions_vs_actuals(model, data_loader, device="cpu", num_batches=5):
    model.eval()

    batch_counter = 0
    with torch.no_grad():
        for batch in data_loader:
            if batch_counter >= num_batches:
                break

            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            predictions = torch.max(F.softmax(output, dim=1), dim=1)
            predicted_labels = predictions[1]
            probabilities = predictions[0]

            # Convert PyTorch tensors to NumPy arrays
            inputs_np = inputs.permute(0, 2, 3, 1).cpu().numpy()

            # Create a figure for each batch
            fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(12, 12))

            for i, ax in enumerate(axes.flat):
                ax.imshow(inputs_np[i])
                ax.axis("off")

                actual_label = targets[i].item()
                predicted_label = predicted_labels[i].item()
                probability = probabilities[i].item()

                # Color the title based on correctness
                title_color = 'green' if actual_label == predicted_label else 'red'

                ax.set_title(f"Actual: {actual_label}\nPredicted: {predicted_label}\nProb: {probability:.2f}", color=title_color)

            plt.tight_layout()
            batch_counter += 1
    return fig

class EarlyStopping:
    def __init__(self, patience, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_valid_accuracy = 0.0
        self.early_stop = False

    def step(self, valid_accuracy):
        if valid_accuracy > self.best_valid_accuracy:
            self.best_valid_accuracy = valid_accuracy
            self.counter = 0
        else:
            self.counter += 1
            if self.counter > self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping activated.")
        return self.early_stop

class LearningRateScheduler(lr_scheduler._LRScheduler):
    def __init__(self, optimizer, patience, factor=0.1, verbose=False):
        self.optimizer = optimizer
        self.patience = patience
        self.factor = factor
        self.verbose = verbose
        self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=self.patience, factor=self.factor, verbose=self.verbose)

    def step(self, valid_accuracy):
        self.lr_scheduler.step(valid_accuracy)
        return self.optimizer.param_groups[0]['lr']


# Create a directory to store TensorBoard logs
log_dir = 'tensorboard_logs'

# Create a TensorBoard SummaryWriter
load_model_name = "tf_efficientnetv2_b3"
model_name = "best_model_" + load_model_name + ".pth"

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
unique_folder_name = f"{current_datetime}_{load_model_name}"
unique_log_dir = os.path.join(log_dir, unique_folder_name)

layout = {
    "Train and validation at same time": {
        "Loss": ["Multiline", ["Loss/Train", "Loss/Validation"]],
        "Accuracy": ["Multiline", ["Accuracy/Train", "Accuracy/Validation"]],
    },
}

writer = SummaryWriter(log_dir=unique_log_dir)
writer.add_custom_scalars(layout)

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    best_valid_accuracy = 0.0
    best_model_state = None

    # Save the model next to the log file
    model_path = os.path.join(unique_log_dir, model_name)

    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Initialize variables for train accuracy calculation
        num_correct_train = 0
        num_examples_train = 0

        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)

            # Calculate the number of correct predictions in the current batch
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct_train += torch.sum(correct).item()
            num_examples_train += correct.shape[0]

        training_loss /= len(train_loader.dataset)
        train_accuracy = num_correct_train / num_examples_train

        model.eval()
        num_correct = 0
        num_examples = 0

        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output, targets)
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)

            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

        valid_loss /= len(val_loader.dataset)
        valid_accuracy = num_correct / num_examples

        # Get the current learning rate from the optimizer
        current_lr = lr_scheduler.step(valid_accuracy)

        print('Epoch: {}, Learning Rate: {}, Training Loss: {:.2f}, Training Accuracy: {:.2f}, Validation Loss: {:.2f}, Validation Accuracy: {:.2f}'.format(epoch, current_lr, training_loss, train_accuracy, valid_loss, valid_accuracy))

        # Log training accuracy to TensorBoard
        writer.add_scalar('Learning Rate', current_lr, epoch)
        writer.add_scalar('Loss/Train', training_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Loss/Validation', valid_loss, epoch)
        writer.add_scalar('Accuracy/Validation', valid_accuracy, epoch)

        early_stop = early_stopping.step(valid_accuracy)
        if early_stop:
            break  # Stop training if early stopping is activated

        # Save the best model based on validation accuracy
        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model_state = model.state_dict()
            # Save the best model state to a file
            torch.save(best_model_state, model_path)

    print(f"\n Model has been saved to {model_path}")

    # Inspect the model
    writer.add_graph(model, inputs)
    writer.add_figure('Confusion Matrix', plot_confusion_matrix(model, val_loader, device))
    # writer.add_figure(f"Predictions vs. Actuals", log_predictions_vs_actuals(model, val_loader, device=device, num_batches=1))

    # Close the TensorBoard SummaryWriter
    writer.close()



if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")


PATH_ESC50_TRAIN="./train1/"
PATH_ESC50_VALID="./valid1/"
PATH_ESC50_TEST="./test/"

bs=16
esc50pre_train = PrecomputedESC50(PATH_ESC50_TRAIN, max_freqmask_width=10, max_timemask_width=10 )
esc50pre_valid = PrecomputedESC50(PATH_ESC50_VALID,max_freqmask_width=10, max_timemask_width=10 )
esc50pre_test = PrecomputedESC50(PATH_ESC50_TEST,max_freqmask_width=10, max_timemask_width=10 )

esc50_train_loader = torch.utils.data.DataLoader(esc50pre_train, bs, shuffle=True)
esc50_val_loader = torch.utils.data.DataLoader(esc50pre_valid, bs, shuffle=True)
esc50_test_loader = torch.utils.data.DataLoader(esc50pre_test, bs, shuffle=True)

#model = models.resnet50(pretrained=True)
model= timm.create_model(load_model_name, pretrained=True)
model.classifier = nn.Sequential(nn.Linear(model.classifier.in_features,500),
                               nn.ReLU(),
                               nn.Dropout(),
                               nn.Linear(500,50))

lr = 1e-2
model.to(device)
# torch.save(model.state_dict(), "model.pth")
loss_fn = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=lr)

# model.load_state_dict(torch.load("model.pth"))

optimizer = optim.Adam([
                        {'params': model.conv_stem.parameters()},
                        {'params': model.bn1.parameters()},
                        # {'params': model.act.parameters()},
                        {'params': model.blocks.parameters(),'lr': 1e-4},
                        {'params': model.conv_head.parameters(), 'lr': 1e-4},
                        {'params': model.bn2.parameters(), 'lr': 1e-4},
                        # {'params': model.act2.parameters(), 'lr': 1e-4},
                        {'params': model.global_pool.parameters(), 'lr': 1e-4},
                        {'params': model.classifier.parameters(), 'lr': 1e-8}
                        ], lr=1e-2)

# Use these classes during training
patience = 5
early_stopping_patience=2*patience
early_stopping = EarlyStopping(patience=early_stopping_patience, verbose=True)
lr_scheduler = LearningRateScheduler(optimizer, patience=patience, factor=0.1, verbose=True)

train(model, optimizer, nn.CrossEntropyLoss(), esc50_train_loader, esc50_val_loader, epochs=50, device=device)

test_accuracy = evaluate(model, esc50_test_loader, device=device)
print(f"\n Test Accuracy: {test_accuracy * 100:.2f}%")

model.safetensors:   0%|          | 0.00/57.9M [00:00<?, ?B/s]

Epoch: 1, Learning Rate: 0.01, Training Loss: 3.74, Training Accuracy: 0.08, Validation Loss: 3.52, Validation Accuracy: 0.23
Epoch: 2, Learning Rate: 0.01, Training Loss: 3.12, Training Accuracy: 0.32, Validation Loss: 2.92, Validation Accuracy: 0.46
Epoch: 3, Learning Rate: 0.01, Training Loss: 2.48, Training Accuracy: 0.53, Validation Loss: 2.29, Validation Accuracy: 0.59
Epoch: 4, Learning Rate: 0.01, Training Loss: 1.92, Training Accuracy: 0.67, Validation Loss: 1.83, Validation Accuracy: 0.66
Epoch: 5, Learning Rate: 0.01, Training Loss: 1.42, Training Accuracy: 0.80, Validation Loss: 1.47, Validation Accuracy: 0.72
Epoch: 6, Learning Rate: 0.01, Training Loss: 1.07, Training Accuracy: 0.86, Validation Loss: 1.27, Validation Accuracy: 0.74
Epoch 00007: reducing learning rate of group 0 to 1.0000e-03.
Epoch 00007: reducing learning rate of group 1 to 1.0000e-03.
Epoch 00007: reducing learning rate of group 2 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 3 to 1.0000e-

In [None]:
%load_ext tensorboard

In [None]:
!ls tensorboard_logs

2023-11-10_120231_tf_efficientnetv2_b3	2023-11-10_123852_tf_efficientnetv2_b3
2023-11-10_123147_tf_efficientnetv2_b3	2023-11-11_051023_tf_efficientnetv2_b3


In [None]:
%tensorboard --logdir tensorboard_logs

### ResNet 50

In [None]:
!pip install timm

Collecting timm
  Downloading timm-0.9.10-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.19.0 safetensors-0.4.0 timm-0.9.10


In [None]:
import IPython.display as display

import glob
from collections import Counter

import math
import pandas as pd

import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import torch
import torchaudio
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from pathlib import Path
from PIL import Image
import soundfile as sf
from torch.utils.data import Dataset
from torchvision import models, transforms
import timm

import tensorflow as tf
from torch.utils.tensorboard import SummaryWriter
import datetime

from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

import torch.optim.lr_scheduler as lr_scheduler

def evaluate(model, test_loader, device="cpu"):
    model.eval()
    num_correct = 0
    num_examples = 0

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

    accuracy = num_correct / num_examples
    return accuracy

class FrequencyMask(object):
    """
      Example:
        >>> transforms.Compose([
        >>>     transforms.ToTensor(),
        >>>     FrequencyMask(max_width=10, use_mean=False),
        >>> ])

    """

    def __init__(self, max_width, use_mean=True):
        self.max_width = max_width
        self.use_mean = use_mean

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of
            size (C, H, W) where the frequency
            mask is to be applied.

        Returns:
            Tensor: Transformed image with Frequency Mask.
        """
        start = random.randrange(0, tensor.shape[2])
        end = start + random.randrange(1, self.max_width)
        if self.use_mean:
            tensor[:, start:end, :] = tensor.mean()
        else:
            tensor[:, start:end, :] = 0
        return tensor

    def __repr__(self):
        format_string = self.__class__.__name__ + "(max_width="
        format_string += str(self.max_width) + ")"
        format_string += 'use_mean=' + (str(self.use_mean) + ')')

        return format_string


class TimeMask(object):
    """
      Example:
        >>> transforms.Compose([
        >>>     transforms.ToTensor(),
        >>>     TimeMask(max_width=10, use_mean=False),
        >>> ])

    """

    def __init__(self, max_width, use_mean=True):
        self.max_width = max_width
        self.use_mean = use_mean

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of
            size (C, H, W) where the time mask
            is to be applied.

        Returns:
            Tensor: Transformed image with Time Mask.
        """
        start = random.randrange(0, tensor.shape[1])
        end = start + random.randrange(0, self.max_width)
        if self.use_mean:
            tensor[:, :, start:end] = tensor.mean()
        else:
            tensor[:, :, start:end] = 0
        return tensor

    def __repr__(self):
        format_string = self.__class__.__name__ + "(max_width="
        format_string += str(self.max_width) + ")"
        format_string += 'use_mean=' + (str(self.use_mean) + ')')
        return format_string


class PrecomputedESC50(Dataset):
    def __init__(self,path, max_freqmask_width, max_timemask_width, use_mean=True, dpi=50):
        files = Path(path).glob('*.png')
        self.items = [(f,int(f.name.split("-")[-1].replace(".wav.png",""))) for f in files]
        self.length = len(self.items)
        self.max_freqmask_width = max_freqmask_width
        self.max_timemask_width = max_timemask_width
        self.use_mean = use_mean
        self.img_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
            transforms.RandomApply([FrequencyMask(self.max_freqmask_width, self.use_mean)], p=0.5),
            transforms.RandomApply([TimeMask(self.max_timemask_width, self.use_mean)], p=0.5)])

    def __getitem__(self, index):
        filename, label = self.items[index]
        img = Image.open(filename).convert('RGB')
        return (self.img_transforms(img), label)

    def __len__(self):
        return self.length

# Define a function to plot and log confusion matrix to TensorBoard
def plot_confusion_matrix(model, test_loader, device="cpu"):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            predictions = torch.max(F.softmax(output, dim=1), dim=1)[1].cpu().numpy()
            all_predictions.extend(predictions)
            all_labels.extend(targets.cpu().numpy())

    # Generate confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)

    # Create a heatmap of the confusion matrix
    plt.figure(figsize=(20, 16))
    sn.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=True, yticklabels=True)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')

    figure = plt.gcf()
    return figure

# Define a function to log predictions vs. actuals as images to TensorBoard
def log_predictions_vs_actuals(model, data_loader, device="cpu", num_batches=5):
    model.eval()

    batch_counter = 0
    with torch.no_grad():
        for batch in data_loader:
            if batch_counter >= num_batches:
                break

            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            predictions = torch.max(F.softmax(output, dim=1), dim=1)
            predicted_labels = predictions[1]
            probabilities = predictions[0]

            # Convert PyTorch tensors to NumPy arrays
            inputs_np = inputs.permute(0, 2, 3, 1).cpu().numpy()

            # Create a figure for each batch
            fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(12, 12))

            for i, ax in enumerate(axes.flat):
                ax.imshow(inputs_np[i])
                ax.axis("off")

                actual_label = targets[i].item()
                predicted_label = predicted_labels[i].item()
                probability = probabilities[i].item()

                # Color the title based on correctness
                title_color = 'green' if actual_label == predicted_label else 'red'

                ax.set_title(f"Actual: {actual_label}\nPredicted: {predicted_label}\nProb: {probability:.2f}", color=title_color)

            plt.tight_layout()
            batch_counter += 1
    return fig

class EarlyStopping:
    def __init__(self, patience, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_valid_accuracy = 0.0
        self.early_stop = False

    def step(self, valid_accuracy):
        if valid_accuracy > self.best_valid_accuracy:
            self.best_valid_accuracy = valid_accuracy
            self.counter = 0
        else:
            self.counter += 1
            if self.counter > self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping activated.")
        return self.early_stop

class LearningRateScheduler(lr_scheduler._LRScheduler):
    def __init__(self, optimizer, patience, factor=0.1, verbose=False):
        self.optimizer = optimizer
        self.patience = patience
        self.factor = factor
        self.verbose = verbose
        self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=self.patience, factor=self.factor, verbose=self.verbose)

    def step(self, valid_accuracy):
        self.lr_scheduler.step(valid_accuracy)
        return self.optimizer.param_groups[0]['lr']


# Create a directory to store TensorBoard logs
log_dir = 'tensorboard_logs'

# Create a TensorBoard SummaryWriter
load_model_name = "ResNet50"
model_name = "best_model_" + load_model_name + ".pth"

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
unique_folder_name = f"{current_datetime}_{load_model_name}"
unique_log_dir = os.path.join(log_dir, unique_folder_name)

layout = {
    "Train and validation at same time": {
        "Loss": ["Multiline", ["Loss/Train", "Loss/Validation"]],
        "Accuracy": ["Multiline", ["Accuracy/Train", "Accuracy/Validation"]],
    },
}

writer = SummaryWriter(log_dir=unique_log_dir)
writer.add_custom_scalars(layout)

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    best_valid_accuracy = 0.0
    best_model_state = None

    # Save the model next to the log file
    model_path = os.path.join(unique_log_dir, model_name)

    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Initialize variables for train accuracy calculation
        num_correct_train = 0
        num_examples_train = 0

        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)

            # Calculate the number of correct predictions in the current batch
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct_train += torch.sum(correct).item()
            num_examples_train += correct.shape[0]

        training_loss /= len(train_loader.dataset)
        train_accuracy = num_correct_train / num_examples_train

        model.eval()
        num_correct = 0
        num_examples = 0

        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output, targets)
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)

            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

        valid_loss /= len(val_loader.dataset)
        valid_accuracy = num_correct / num_examples

        # Get the current learning rate from the optimizer
        current_lr = lr_scheduler.step(valid_accuracy)

        print('Epoch: {}, Learning Rate: {}, Training Loss: {:.2f}, Training Accuracy: {:.2f}, Validation Loss: {:.2f}, Validation Accuracy: {:.2f}'.format(epoch, current_lr, training_loss, train_accuracy, valid_loss, valid_accuracy))

        # Log training accuracy to TensorBoard
        writer.add_scalar('Learning Rate', current_lr, epoch)
        writer.add_scalar('Loss/Train', training_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Loss/Validation', valid_loss, epoch)
        writer.add_scalar('Accuracy/Validation', valid_accuracy, epoch)

        early_stop = early_stopping.step(valid_accuracy)
        if early_stop:
            break  # Stop training if early stopping is activated

        # Save the best model based on validation accuracy
        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model_state = model.state_dict()
            # Save the best model state to a file
            torch.save(best_model_state, model_path)

    print(f"\n Model has been saved to {model_path}")

    # Inspect the model
    writer.add_graph(model, inputs)
    writer.add_figure('Confusion Matrix', plot_confusion_matrix(model, val_loader, device))
    # writer.add_figure(f"Predictions vs. Actuals", log_predictions_vs_actuals(model, val_loader, device=device, num_batches=1))

    # Close the TensorBoard SummaryWriter
    writer.close()



if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")


PATH_ESC50_TRAIN="./train1/"
PATH_ESC50_VALID="./valid1/"
PATH_ESC50_TEST="./test/"

bs=16
esc50pre_train = PrecomputedESC50(PATH_ESC50_TRAIN, max_freqmask_width=10, max_timemask_width=10 )
esc50pre_valid = PrecomputedESC50(PATH_ESC50_VALID,max_freqmask_width=10, max_timemask_width=10 )
esc50pre_test = PrecomputedESC50(PATH_ESC50_TEST,max_freqmask_width=10, max_timemask_width=10 )

esc50_train_loader = torch.utils.data.DataLoader(esc50pre_train, bs, shuffle=True)
esc50_val_loader = torch.utils.data.DataLoader(esc50pre_valid, bs, shuffle=True)
esc50_test_loader = torch.utils.data.DataLoader(esc50pre_test, bs, shuffle=True)

model = models.resnet50(pretrained=True)

# Replace the last fully connected layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 500),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(500, 50)
)

lr = 1e-2
model.to(device)
loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam([
                        {'params': model.conv1.parameters(), 'lr': 1e-4},
                        {'params': model.layer1.parameters(), 'lr': 1e-4},
                        {'params': model.layer2.parameters(), 'lr': 1e-4},
                        {'params': model.layer3.parameters(), 'lr': 1e-4},
                        {'params': model.layer4.parameters(), 'lr': 1e-4},
                        {'params': model.fc.parameters(), 'lr': 1e-8}
                        ], lr=1e-2)

# Use these classes during training
patience = 5
early_stopping_patience=2*patience
early_stopping = EarlyStopping(patience=early_stopping_patience, verbose=True)
lr_scheduler = LearningRateScheduler(optimizer, patience=patience, factor=0.1, verbose=True)

train(model, optimizer, nn.CrossEntropyLoss(), esc50_train_loader, esc50_val_loader, epochs=50, device=device)

test_accuracy = evaluate(model, esc50_test_loader, device=device)
print(f"\n Test Accuracy: {test_accuracy * 100:.2f}%")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 165MB/s]


Epoch: 1, Learning Rate: 0.0001, Training Loss: 3.60, Training Accuracy: 0.17, Validation Loss: 3.06, Validation Accuracy: 0.35
Epoch: 2, Learning Rate: 0.0001, Training Loss: 2.97, Training Accuracy: 0.42, Validation Loss: 2.52, Validation Accuracy: 0.49
Epoch: 3, Learning Rate: 0.0001, Training Loss: 2.41, Training Accuracy: 0.59, Validation Loss: 2.05, Validation Accuracy: 0.64
Epoch: 4, Learning Rate: 0.0001, Training Loss: 1.97, Training Accuracy: 0.73, Validation Loss: 1.70, Validation Accuracy: 0.67
Epoch: 5, Learning Rate: 0.0001, Training Loss: 1.55, Training Accuracy: 0.82, Validation Loss: 1.68, Validation Accuracy: 0.68
Epoch: 6, Learning Rate: 0.0001, Training Loss: 1.23, Training Accuracy: 0.90, Validation Loss: 1.33, Validation Accuracy: 0.77
Epoch 00007: reducing learning rate of group 0 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 1 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 2 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 3

In [None]:
%load_ext tensorboard

In [None]:
!ls tensorboard_logs

2023-11-10_120231_tf_efficientnetv2_b3	2023-11-10_123852_tf_efficientnetv2_b3
2023-11-10_123147_tf_efficientnetv2_b3	2023-11-11_051023_tf_efficientnetv2_b3


In [None]:
%tensorboard --logdir tensorboard_logs

### ResNet152

In [None]:
# Create a directory to store TensorBoard logs
log_dir = 'tensorboard_logs'

# Create a TensorBoard SummaryWriter
load_model_name = "ResNet152"
model_name = "best_model_" + load_model_name + ".pth"

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
unique_folder_name = f"{current_datetime}_{load_model_name}"
unique_log_dir = os.path.join(log_dir, unique_folder_name)

layout = {
    "Train and validation at same time": {
        "Loss": ["Multiline", ["Loss/Train", "Loss/Validation"]],
        "Accuracy": ["Multiline", ["Accuracy/Train", "Accuracy/Validation"]],
    },
}

writer = SummaryWriter(log_dir=unique_log_dir)
writer.add_custom_scalars(layout)

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    best_valid_accuracy = 0.0
    best_model_state = None

    # Save the model next to the log file
    model_path = os.path.join(unique_log_dir, model_name)

    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Initialize variables for train accuracy calculation
        num_correct_train = 0
        num_examples_train = 0

        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)

            # Calculate the number of correct predictions in the current batch
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct_train += torch.sum(correct).item()
            num_examples_train += correct.shape[0]

        training_loss /= len(train_loader.dataset)
        train_accuracy = num_correct_train / num_examples_train

        model.eval()
        num_correct = 0
        num_examples = 0

        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output, targets)
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)

            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

        valid_loss /= len(val_loader.dataset)
        valid_accuracy = num_correct / num_examples

        # Get the current learning rate from the optimizer
        current_lr = lr_scheduler.step(valid_accuracy)

        print('Epoch: {}, Learning Rate: {}, Training Loss: {:.2f}, Training Accuracy: {:.2f}, Validation Loss: {:.2f}, Validation Accuracy: {:.2f}'.format(epoch, current_lr, training_loss, train_accuracy, valid_loss, valid_accuracy))

        # Log training accuracy to TensorBoard
        writer.add_scalar('Learning Rate', current_lr, epoch)
        writer.add_scalar('Loss/Train', training_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Loss/Validation', valid_loss, epoch)
        writer.add_scalar('Accuracy/Validation', valid_accuracy, epoch)

        early_stop = early_stopping.step(valid_accuracy)
        if early_stop:
            break  # Stop training if early stopping is activated

        # Save the best model based on validation accuracy
        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model_state = model.state_dict()
            # Save the best model state to a file
            torch.save(best_model_state, model_path)

    print(f"\n Model has been saved to {model_path}")

    # Inspect the model
    writer.add_graph(model, inputs)
    writer.add_figure('Confusion Matrix', plot_confusion_matrix(model, val_loader, device))
    # writer.add_figure(f"Predictions vs. Actuals", log_predictions_vs_actuals(model, val_loader, device=device, num_batches=1))

    # Close the TensorBoard SummaryWriter
    writer.close()

model = models.resnet152(pretrained=True)

# Replace the last fully connected layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 500),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(500, 50)
)

lr = 1e-2
model.to(device)
loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam([
                        {'params': model.conv1.parameters(), 'lr': 1e-4},
                        {'params': model.layer1.parameters(), 'lr': 1e-4},
                        {'params': model.layer2.parameters(), 'lr': 1e-4},
                        {'params': model.layer3.parameters(), 'lr': 1e-4},
                        {'params': model.layer4.parameters(), 'lr': 1e-4},
                        {'params': model.fc.parameters(), 'lr': 1e-8}
                        ], lr=1e-2)

# Use these classes during training
patience = 5
early_stopping_patience=8
early_stopping = EarlyStopping(patience=early_stopping_patience, verbose=True)
lr_scheduler = LearningRateScheduler(optimizer, patience=patience, factor=0.1, verbose=True)

train(model, optimizer, nn.CrossEntropyLoss(), esc50_train_loader, esc50_val_loader, epochs=50, device=device)

test_accuracy = evaluate(model, esc50_test_loader, device=device)
print(f"\n Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch: 1, Learning Rate: 0.0001, Training Loss: 3.58, Training Accuracy: 0.20, Validation Loss: 3.08, Validation Accuracy: 0.43
Epoch: 2, Learning Rate: 0.0001, Training Loss: 2.84, Training Accuracy: 0.43, Validation Loss: 2.33, Validation Accuracy: 0.54
Epoch: 3, Learning Rate: 0.0001, Training Loss: 2.28, Training Accuracy: 0.60, Validation Loss: 2.03, Validation Accuracy: 0.58
Epoch: 4, Learning Rate: 0.0001, Training Loss: 1.80, Training Accuracy: 0.70, Validation Loss: 1.50, Validation Accuracy: 0.71
Epoch: 5, Learning Rate: 0.0001, Training Loss: 1.35, Training Accuracy: 0.82, Validation Loss: 1.51, Validation Accuracy: 0.68
Epoch: 6, Learning Rate: 0.0001, Training Loss: 1.14, Training Accuracy: 0.85, Validation Loss: 1.19, Validation Accuracy: 0.75
Epoch 00007: reducing learning rate of group 0 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 1 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 2 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 3

### wide Resnet 101-2

bring `model_path` out of train function to use saved model on test data

In [None]:
# Create a directory to store TensorBoard logs
log_dir = 'tensorboard_logs'

# Create a TensorBoard SummaryWriter
load_model_name = "wide_resnet101_2"
model_name = "best_model_" + load_model_name + ".pth"

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
unique_folder_name = f"{current_datetime}_{load_model_name}"
unique_log_dir = os.path.join(log_dir, unique_folder_name)
model_path = os.path.join(unique_log_dir, model_name)

layout = {
    "Train and validation at same time": {
        "Loss": ["Multiline", ["Loss/Train", "Loss/Validation"]],
        "Accuracy": ["Multiline", ["Accuracy/Train", "Accuracy/Validation"]],
    },
}

writer = SummaryWriter(log_dir=unique_log_dir)
writer.add_custom_scalars(layout)

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    best_valid_accuracy = 0.0
    best_model_state = None

    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Initialize variables for train accuracy calculation
        num_correct_train = 0
        num_examples_train = 0

        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)

            # Calculate the number of correct predictions in the current batch
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct_train += torch.sum(correct).item()
            num_examples_train += correct.shape[0]

        training_loss /= len(train_loader.dataset)
        train_accuracy = num_correct_train / num_examples_train

        model.eval()
        num_correct = 0
        num_examples = 0

        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output, targets)
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)

            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

        valid_loss /= len(val_loader.dataset)
        valid_accuracy = num_correct / num_examples

        # Get the current learning rate from the optimizer
        current_lr = lr_scheduler.step(valid_accuracy)

        print('Epoch: {}, Learning Rate: {}, Training Loss: {:.2f}, Training Accuracy: {:.2f}, Validation Loss: {:.2f}, Validation Accuracy: {:.2f}'.format(epoch, current_lr, training_loss, train_accuracy, valid_loss, valid_accuracy))

        # Log training accuracy to TensorBoard
        writer.add_scalar('Learning Rate', current_lr, epoch)
        writer.add_scalar('Loss/Train', training_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Loss/Validation', valid_loss, epoch)
        writer.add_scalar('Accuracy/Validation', valid_accuracy, epoch)

        early_stop = early_stopping.step(valid_accuracy)
        if early_stop:
            break  # Stop training if early stopping is activated

        # Save the best model based on validation accuracy
        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model_state = model.state_dict()
            # Save the best model state to a file
            torch.save(best_model_state, model_path)

    print(f"\n Model has been saved to {model_path}")

    # Inspect the model
    writer.add_graph(model, inputs)
    writer.add_figure('Confusion Matrix', plot_confusion_matrix(model, val_loader, device))
    # writer.add_figure(f"Predictions vs. Actuals", log_predictions_vs_actuals(model, val_loader, device=device, num_batches=1))

    # Close the TensorBoard SummaryWriter
    writer.close()

model = models.wide_resnet101_2(pretrained=True)

# Replace the last fully connected layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 500),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(500, 50)
)

lr = 1e-2
model.to(device)
loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam([
                        {'params': model.conv1.parameters(), 'lr': 1e-4},
                        {'params': model.layer1.parameters(), 'lr': 1e-4},
                        {'params': model.layer2.parameters(), 'lr': 1e-4},
                        {'params': model.layer3.parameters(), 'lr': 1e-4},
                        {'params': model.layer4.parameters(), 'lr': 1e-4},
                        {'params': model.fc.parameters(), 'lr': 1e-8}
                        ], lr=1e-2)

# Use these classes during training
patience = 5
early_stopping_patience=10
early_stopping = EarlyStopping(patience=early_stopping_patience, verbose=True)
lr_scheduler = LearningRateScheduler(optimizer, patience=patience, factor=0.1, verbose=True)

train(model, optimizer, nn.CrossEntropyLoss(), esc50_train_loader, esc50_val_loader, epochs=50, device=device)

model.load_state_dict(torch.load(model_path))
test_accuracy = evaluate(model, esc50_test_loader, device=device)
print(f"\n Test Accuracy: {test_accuracy * 100:.2f}%")

Downloading: "https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth" to /root/.cache/torch/hub/checkpoints/wide_resnet101_2-32ee1156.pth
100%|██████████| 243M/243M [00:08<00:00, 31.8MB/s]


Epoch: 1, Learning Rate: 0.0001, Training Loss: 3.62, Training Accuracy: 0.20, Validation Loss: 3.54, Validation Accuracy: 0.30
Epoch: 2, Learning Rate: 0.0001, Training Loss: 2.98, Training Accuracy: 0.50, Validation Loss: 2.54, Validation Accuracy: 0.50
Epoch: 3, Learning Rate: 0.0001, Training Loss: 2.50, Training Accuracy: 0.64, Validation Loss: 1.97, Validation Accuracy: 0.50
Epoch: 4, Learning Rate: 0.0001, Training Loss: 2.04, Training Accuracy: 0.79, Validation Loss: 2.28, Validation Accuracy: 0.50
Epoch: 5, Learning Rate: 0.0001, Training Loss: 1.64, Training Accuracy: 0.85, Validation Loss: 2.09, Validation Accuracy: 0.60
Epoch: 6, Learning Rate: 0.0001, Training Loss: 1.42, Training Accuracy: 0.90, Validation Loss: 1.97, Validation Accuracy: 0.70
Epoch 00007: reducing learning rate of group 0 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 1 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 2 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 3

### ResNeXt-101

In [None]:
# Create a directory to store TensorBoard logs
log_dir = 'tensorboard_logs'

# Create a TensorBoard SummaryWriter
load_model_name = "resnext101_32x8d"
model_name = "best_model_" + load_model_name + ".pth"

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
unique_folder_name = f"{current_datetime}_{load_model_name}"
unique_log_dir = os.path.join(log_dir, unique_folder_name)
model_path = os.path.join(unique_log_dir, model_name)

layout = {
    "Train and validation at same time": {
        "Loss": ["Multiline", ["Loss/Train", "Loss/Validation"]],
        "Accuracy": ["Multiline", ["Accuracy/Train", "Accuracy/Validation"]],
    },
}

writer = SummaryWriter(log_dir=unique_log_dir)
writer.add_custom_scalars(layout)

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    best_valid_accuracy = 0.0
    best_model_state = None

    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Initialize variables for train accuracy calculation
        num_correct_train = 0
        num_examples_train = 0

        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)

            # Calculate the number of correct predictions in the current batch
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct_train += torch.sum(correct).item()
            num_examples_train += correct.shape[0]

        training_loss /= len(train_loader.dataset)
        train_accuracy = num_correct_train / num_examples_train

        model.eval()
        num_correct = 0
        num_examples = 0

        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output, targets)
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)

            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

        valid_loss /= len(val_loader.dataset)
        valid_accuracy = num_correct / num_examples

        # Get the current learning rate from the optimizer
        current_lr = lr_scheduler.step(valid_accuracy)

        print('Epoch: {}, Learning Rate: {}, Training Loss: {:.2f}, Training Accuracy: {:.2f}, Validation Loss: {:.2f}, Validation Accuracy: {:.2f}'.format(epoch, current_lr, training_loss, train_accuracy, valid_loss, valid_accuracy))

        # Log training accuracy to TensorBoard
        writer.add_scalar('Learning Rate', current_lr, epoch)
        writer.add_scalar('Loss/Train', training_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Loss/Validation', valid_loss, epoch)
        writer.add_scalar('Accuracy/Validation', valid_accuracy, epoch)

        early_stop = early_stopping.step(valid_accuracy)
        if early_stop:
            break  # Stop training if early stopping is activated

        # Save the best model based on validation accuracy
        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model_state = model.state_dict()
            # Save the best model state to a file
            torch.save(best_model_state, model_path)

    print(f"\n Model has been saved to {model_path}")

    # Inspect the model
    writer.add_graph(model, inputs)
    writer.add_figure('Confusion Matrix', plot_confusion_matrix(model, val_loader, device))
    # writer.add_figure(f"Predictions vs. Actuals", log_predictions_vs_actuals(model, val_loader, device=device, num_batches=1))

    # Close the TensorBoard SummaryWriter
    writer.close()

model = models.resnext101_32x8d(pretrained=True)

# Replace the last fully connected layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 500),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(500, 50)
)

lr = 1e-2
model.to(device)
loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam([
                        {'params': model.conv1.parameters(), 'lr': 1e-4},
                        {'params': model.layer1.parameters(), 'lr': 1e-4},
                        {'params': model.layer2.parameters(), 'lr': 1e-4},
                        {'params': model.layer3.parameters(), 'lr': 1e-4},
                        {'params': model.layer4.parameters(), 'lr': 1e-4},
                        {'params': model.fc.parameters(), 'lr': 1e-8}
                        ], lr=1e-2)

# Use these classes during training
patience = 5
early_stopping_patience=10
early_stopping = EarlyStopping(patience=early_stopping_patience, verbose=True)
lr_scheduler = LearningRateScheduler(optimizer, patience=patience, factor=0.1, verbose=True)

train(model, optimizer, nn.CrossEntropyLoss(), esc50_train_loader, esc50_val_loader, epochs=50, device=device)

model.load_state_dict(torch.load(model_path))
test_accuracy = evaluate(model, esc50_test_loader, device=device)
print(f"\n Test Accuracy: {test_accuracy * 100:.2f}%")

Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /root/.cache/torch/hub/checkpoints/resnext101_32x8d-8ba56ff5.pth
100%|██████████| 340M/340M [00:04<00:00, 72.7MB/s]


Epoch: 1, Learning Rate: 0.0001, Training Loss: 3.37, Training Accuracy: 0.26, Validation Loss: 2.56, Validation Accuracy: 0.52
Epoch: 2, Learning Rate: 0.0001, Training Loss: 2.37, Training Accuracy: 0.61, Validation Loss: 1.88, Validation Accuracy: 0.61
Epoch: 3, Learning Rate: 0.0001, Training Loss: 1.71, Training Accuracy: 0.76, Validation Loss: 1.39, Validation Accuracy: 0.73
Epoch: 4, Learning Rate: 0.0001, Training Loss: 1.20, Training Accuracy: 0.87, Validation Loss: 1.34, Validation Accuracy: 0.72
Epoch: 5, Learning Rate: 0.0001, Training Loss: 0.95, Training Accuracy: 0.91, Validation Loss: 1.23, Validation Accuracy: 0.76
Epoch: 6, Learning Rate: 0.0001, Training Loss: 0.73, Training Accuracy: 0.93, Validation Loss: 1.19, Validation Accuracy: 0.73
Epoch 00007: reducing learning rate of group 0 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 1 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 2 to 1.0000e-05.
Epoch 00007: reducing learning rate of group 3

### MobileNet V3 Large

In [None]:
# Create a directory to store TensorBoard logs
log_dir = 'tensorboard_logs'

# Create a TensorBoard SummaryWriter
load_model_name = "mobilenet_v3_large_without_lr_scheduler"
model_name = "best_model_" + load_model_name + ".pth"

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
unique_folder_name = f"{current_datetime}_{load_model_name}"
unique_log_dir = os.path.join(log_dir, unique_folder_name)
model_path = os.path.join(unique_log_dir, model_name)

layout = {
    "Train and validation at same time": {
        "Loss": ["Multiline", ["Loss/Train", "Loss/Validation"]],
        "Accuracy": ["Multiline", ["Accuracy/Train", "Accuracy/Validation"]],
    },
}

writer = SummaryWriter(log_dir=unique_log_dir)
writer.add_custom_scalars(layout)

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    best_valid_accuracy = 0.0
    best_model_state = None

    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()

        # Initialize variables for train accuracy calculation
        num_correct_train = 0
        num_examples_train = 0

        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)

            # Calculate the number of correct predictions in the current batch
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct_train += torch.sum(correct).item()
            num_examples_train += correct.shape[0]

        training_loss /= len(train_loader.dataset)
        train_accuracy = num_correct_train / num_examples_train

        model.eval()
        num_correct = 0
        num_examples = 0

        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output, targets)
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)

            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]

        valid_loss /= len(val_loader.dataset)
        valid_accuracy = num_correct / num_examples

        # Get the current learning rate from the optimizer
        current_lr = lr_scheduler.step(valid_accuracy)

        print('Epoch: {}, Learning Rate: {}, Training Loss: {:.2f}, Training Accuracy: {:.2f}, Validation Loss: {:.2f}, Validation Accuracy: {:.2f}'.format(epoch, current_lr, training_loss, train_accuracy, valid_loss, valid_accuracy))

        # Log training accuracy to TensorBoard
        writer.add_scalar('Learning Rate', current_lr, epoch)
        writer.add_scalar('Loss/Train', training_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Loss/Validation', valid_loss, epoch)
        writer.add_scalar('Accuracy/Validation', valid_accuracy, epoch)

        early_stop = early_stopping.step(valid_accuracy)
        if early_stop:
            break  # Stop training if early stopping is activated

        # Save the best model based on validation accuracy
        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model_state = model.state_dict()
            # Save the best model state to a file
            torch.save(best_model_state, model_path)

    print(f"\n Model has been saved to {model_path}")

    # Inspect the model
    writer.add_graph(model, inputs)
    writer.add_figure('Confusion Matrix', plot_confusion_matrix(model, val_loader, device))
    # writer.add_figure(f"Predictions vs. Actuals", log_predictions_vs_actuals(model, val_loader, device=device, num_batches=1))

    # Close the TensorBoard SummaryWriter
    writer.close()

model = models.mobilenet_v3_large(pretrained=True)

# Replace the classifier (fully connected) layer
num_features = model.classifier[-4].in_features
model.classifier[-4] = nn.Linear(num_features, 500)
model.classifier[-1] = nn.Linear(500, 50)

lr = 1e-2
model.to(device)
loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam([
    {'params': model.features.parameters(), 'lr': 1e-4},
    {'params': model.classifier.parameters(), 'lr': 1e-8}
], lr=1e-2)

# Use these classes during training
patience = 555
early_stopping_patience=10
early_stopping = EarlyStopping(patience=early_stopping_patience, verbose=True)
lr_scheduler = LearningRateScheduler(optimizer, patience=patience, factor=0.1, verbose=True)

train(model, optimizer, nn.CrossEntropyLoss(), esc50_train_loader, esc50_val_loader, epochs=50, device=device)

model.load_state_dict(torch.load(model_path))
test_accuracy = evaluate(model, esc50_test_loader, device=device)
print(f"\n Test Accuracy: {test_accuracy * 100:.2f}%")




Epoch: 1, Learning Rate: 0.0001, Training Loss: 3.83, Training Accuracy: 0.12, Validation Loss: 3.83, Validation Accuracy: 0.10
Epoch: 2, Learning Rate: 0.0001, Training Loss: 3.58, Training Accuracy: 0.30, Validation Loss: 3.53, Validation Accuracy: 0.30
Epoch: 3, Learning Rate: 0.0001, Training Loss: 3.34, Training Accuracy: 0.40, Validation Loss: 3.16, Validation Accuracy: 0.43
Epoch: 4, Learning Rate: 0.0001, Training Loss: 3.11, Training Accuracy: 0.45, Validation Loss: 2.95, Validation Accuracy: 0.46
Epoch: 5, Learning Rate: 0.0001, Training Loss: 2.90, Training Accuracy: 0.49, Validation Loss: 2.77, Validation Accuracy: 0.51
Epoch: 6, Learning Rate: 0.0001, Training Loss: 2.69, Training Accuracy: 0.54, Validation Loss: 2.53, Validation Accuracy: 0.50
Epoch: 7, Learning Rate: 0.0001, Training Loss: 2.51, Training Accuracy: 0.58, Validation Loss: 2.40, Validation Accuracy: 0.55
Epoch: 8, Learning Rate: 0.0001, Training Loss: 2.36, Training Accuracy: 0.61, Validation Loss: 2.33, Va