## Import the necessary modules

In [1]:
from pathlib import Path
import pickle
import pandas as pd
import numpy as np
from torchvision import transforms
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from torch import nn, optim
import torch
from sklearn.metrics import accuracy_score
from fastprogress.fastprogress import master_bar, progress_bar
from PIL import Image

# configs setup

In [2]:
batch_size = 64
lr = 1e-3
tmax = 10
eta_min = 1e-5
num_epochs = 10
debug = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# dataset class and transforms

In [3]:
trainsforms_dict = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
    ])
}


class FreeShoundTrainDataset(Dataset):
    def __init__(self, mels, labels, transform, time_mask=0.1, freq_mask=0.1, spec_aug=True):
        self.mels = mels  # (num_samples, 128, var, 3)
        self.labels = labels
        self.tfms = transform
        self.time_mask = time_mask
        self.freq_mask = freq_mask
        self.spec_aug = spec_aug

    def __len__(self):
        return len(self.mels)

    def __getitem__(self, idx):
        mel = self.mels[idx]  # (128, 451, 3)
        base_dim, time_dim, _ = mel.shape
        crop = np.random.randint(0, time_dim-base_dim)  # (97)
        image = mel[:, crop: crop + base_dim, ...]  # (128, 128, 3)

        if self.spec_aug:
            freq_mask_begin = int(np.random.uniform(
                0, 1 - self.freq_mask) * base_dim)
            image[freq_mask_begin:freq_mask_begin +
                  int(self.freq_mask * base_dim), ...] = 0
            time_mask_begin = int(np.random.uniform(
                0, 1 - self.time_mask) * base_dim)
            image[:, time_mask_begin:time_mask_begin +
                  int(self.time_mask * base_dim), ...] = 0

        image = Image.fromarray(image[..., 0], mode='L')  # (128, 128)
        image = self.tfms(image).div_(255)  # (1, 128, 128)
        if self.labels is not None:
            label = np.asarray(self.labels)[idx]
            label = torch.from_numpy(label).float()
        return (image, label) if self.labels is not None else image

# handle multi label 

In [4]:
def transform_labels(df):
    df['labels'] = df['labels'].str.split(',')
    unique_labels = set()
    for labels in df['labels']:
        unique_labels.update(labels)
    unique_labels = sorted(list(unique_labels))

    for label in unique_labels:
        df[label] = df['labels'].apply(lambda x: 1 if label in x else 0)

    df = df.drop('labels', axis=1)

    return df

# Models 

In [5]:
import torch.nn as nn
import torchvision.models as models

class Model(nn.Module):
    def __init__(self, num_classes, pretrained=False):
        super().__init__()
        
        # Load the model with pretrained weights if specified
        self.model = models.resnet18(pretrained=pretrained)
        
        # Modify the first convolution layer for grayscale input (1 channel)
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Get the input features of the final fully connected layer
        self.in_features = self.model.fc.in_features
        
        # Replace the final fully connected layer
        self.model.fc = nn.Linear(self.in_features, num_classes)
    
    def forward(self, x):
        return self.model(x)

# Training Function

In [6]:
from fastprogress.fastprogress import master_bar, progress_bar
import torch


def train_model(
    model, train_dl, valid_dl, loss_fn, num_epochs, optimizer, scheduler, device
):
    mb = master_bar(range(num_epochs))
    for epoch in mb:
        model.train()
        running_loss = 0
        running_acc = 0
        val_run_loss = 0
        val_run_acc = 0
        for xb, yb in progress_bar(train_dl, parent=mb):
            logits = model(xb.to(device))
            loss = loss_fn(logits, yb.to(device))
            running_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            predictions = (torch.sigmoid(logits) > 0.5).float()
            accuracy = (predictions.cpu().numpy() == yb.cpu().numpy()).mean()
            running_acc += accuracy.item()
        scheduler.step()

        for xb, yb in progress_bar(valid_dl, parent=mb):
            with torch.no_grad():
                logits = model(xb.to(device))
                loss = loss_fn(logits, yb.to(device))
                val_run_loss += loss.item()

                predictions = (torch.sigmoid(logits) > 0.5).float()
                accuracy = (predictions.cpu().numpy() == yb.cpu().numpy()).mean()
                val_run_acc += accuracy.item()
        mb.write(
            f"Epoch {epoch} | "
            f"Train Loss: {running_loss/len(train_dl):.3f} | "
            f"Valid Loss: {val_run_loss/len(valid_dl):.3f} | "
            f"Train Acc: {running_acc/len(train_dl):.2f} | "
            f"Valid Acc: {val_run_acc/len(valid_dl):.2f} | "
        )


In [7]:
# load input data
path = Path('/kaggle/input/data-preprocessing/data/')
train_df = pd.read_csv('/kaggle/input/freesound-audio-tagging-2019/train_curated.csv')
train_df = transform_labels(train_df)

# load preprocessed audios and corresponding labels
processed_train = pickle.load(
    open(path / "mels_train.pkl", "rb")
)  # (4970, 128, var, 3)
y_train = train_df.iloc[:, 1:]

# create custom dataset and dataloader
ds = FreeShoundTrainDataset(processed_train, y_train, trainsforms_dict["train"])
if debug:
    ds = Subset(ds, range(100))
train_size = int(len(ds) * 0.8)
valid_size = len(ds) - train_size
train_ds, valid_ds = random_split(ds, [train_size, valid_size])
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False)

# load model
# model = Model(num_classes=len(le.classes_), pretrained=True)
model = Model(num_classes=80, pretrained=True)
model.to(device)

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(params=model.parameters(), lr=lr, amsgrad=False)
scheduler = optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=tmax, eta_min=eta_min
)

settings = {
    "model": model,
    "train_dl": train_dl,
    "valid_dl": valid_dl,
    "loss_fn": loss_fn,
    "num_epochs": num_epochs,
    "optimizer": optimizer,
    "scheduler": scheduler,
    "device": device,
}
train_model(**settings)

torch.save(model.state_dict(), "model.pt")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 154MB/s]


In [8]:
import torch
import gc

def clear_all_gpu_memory():
    """
    Clears all global variables and frees GPU memory.
    """
    # Delete all global variables except built-ins
    for name in dir():
        if not name.startswith('_'):
            del globals()[name]
    
    # Collect garbage
    gc.collect()
    
    # Empty CUDA cache if using PyTorch
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    print("All GPU memory cleared.")

In [9]:
clear_all_gpu_memory()

All GPU memory cleared.
