Imports:

In [1]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
import numpy as np
import torchvision
from torchvision import datasets, models, transforms as T
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import plotly_express as px
from sklearn.metrics import classification_report
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.13.0+cu117
Torchvision Version:  0.14.0+cu117


Hyperparameters:

In [2]:
img_height = 224
img_width = img_height
num_classes = 10

batch_size = 32
n_epochs = 20
train_base = True
optimizer_name = "Adam"
SGD_momentum = 0.9
learning_rate = 0.001
learn_rate_step = 5
learn_rate_gamma = 0.7
use_cuda = True
torch.cuda.amp.autocast(enabled=True)

<torch.cuda.amp.autocast_mode.autocast at 0x21ac32a9af0>

Image Transform Functions (For Later):

In [3]:
class MyAddGaussNoise(object):
    def __init__(self, input_size, mean=0.0, std=None, add_noise_probability=1.0):
        assert isinstance(input_size, (int, tuple))
        assert isinstance(mean, (int, float))
        assert isinstance(std, (int, float)) or std is None
        assert isinstance(add_noise_probability, (float))


        if isinstance(input_size, int):
            self.input_size = (input_size, input_size)
        else:
            assert len(input_size) == 2
            self.input_size = input_size

        self.mean = mean

        if std is not None:
            assert std > 0.0
            self.std = std
        else:
            self.std = std

        assert add_noise_probability > 0.0 and add_noise_probability <= 1.0
        self.add_noise_prob = add_noise_probability


    def __call__(self, spectrogram):
        if np.random.random() > self.add_noise_prob:
            return spectrogram

        # set some std value 
        min_pixel_value = np.min(spectrogram)
        if self.std is None:
            std_factor = 0.03     # factor number 
        std = np.abs(min_pixel_value*std_factor)

        # generate a white noise spectrogram
        gauss_mask = np.random.normal(self.mean, 
                                    std, 
                                    size=self.input_size).astype('float32')
        
        # add white noise to the sound spectrogram
        noisy_visual = spectrogram + gauss_mask

        return noisy_visual

class MyRightShift(object):
    def __init__(self, input_size, width_shift_range, shift_probability=1.0):
        assert isinstance(input_size, (int, tuple))
        assert isinstance(width_shift_range, (int, float))
        assert isinstance(shift_probability, (float))

        if isinstance(input_size, int):
            self.input_size = (input_size, input_size)
        else:
            assert len(input_size) == 2
            self.input_size = input_size

        if isinstance(width_shift_range, int):
            assert width_shift_range > 0
            assert width_shift_range <= self.input_size[1]
            self.width_shift_range = width_shift_range
        else:
            assert width_shift_range > 0.0
            assert width_shift_range <= 1.0
            self.width_shift_range = int(width_shift_range * self.input_size[1])
                        
        assert shift_probability > 0.0 and shift_probability <= 1.0
        self.shift_prob = shift_probability

    def __call__(self, image):
        if np.random.random() > self.shift_prob:
            return image

        # create a new array filled with the min value
        shifted_image= np.full(self.input_size, np.min(image), dtype='float32')

        # randomly choose a start postion
        rand_position = np.random.randint(1, self.width_shift_range)

        # shift the image
        shifted_image[:,rand_position:] = copy.deepcopy(image[:,:-rand_position])

        return shifted_image

#applying required transformations on the dataset
img_transforms = {
    'train':
    T.Compose([
        MyAddGaussNoise(input_size = img_height,add_noise_probability=0.5),
        MyRightShift(input_size = img_height, width_shift_range=0.9, shift_probability=0.5),
        T.ToTensor(),
        T.Normalize([0.5],[0.5])
        ]),

    'valid':
    T.Compose([
        T.ToTensor(),
        T.Normalize([0.5], [0.5])
        ]),

    'test':
    T.Compose([
        T.ToTensor(),
        T.Normalize([0.5], [0.5])
        ]),
     }

Custom Dataset Object (UrbanSound8kDataset):

In [4]:
class UrbanSound8kDataset(Dataset):
    def __init__(self, featuresdf, transform=None):
        assert isinstance(featuresdf, pd.DataFrame)
        assert len(featuresdf.columns) == 3
        
        self.transform = transform

        self.featuresdf = featuresdf

    def __len__(self):
        return len(self.featuresdf)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()

        cochleagram, label, fold = self.featuresdf.iloc[index]

        if self.transform is not None:
           cochleagram = self.transform(cochleagram)

        if not torch.is_tensor(cochleagram):
            cochleagram = torch.as_tensor(cochleagram.astype('float'))

        label = torch.as_tensor(np.array(label)).type(torch.LongTensor)

        cochleagram = cochleagram.expand(3,-1,-1).float()

        return cochleagram, label



Initialize Model Architecture Function:

In [5]:
def init_model():
    model_transfer = models.vgg16(weights='VGG16_Weights.DEFAULT')

    for param in model_transfer.parameters():
        param.requires_grad = train_base
    in_features = model_transfer.classifier[0].in_features

    #this should remain constant
    model_transfer.fc = nn.Sequential(  
        nn.Linear(in_features, num_classes),
        )

    # selecting loss function
    criterion_transfer = nn.CrossEntropyLoss()

    #using Adam classifier
    if optimizer_name == 'Adam':
        optimizer_transfer = optim.Adam(model_transfer.parameters(), lr=learning_rate)

    if optimizer_name == 'SGD':
        optimizer_transfer = optim.SGD(model_transfer.parameters(), lr=learning_rate, momentum=SGD_momentum)

    scheduler_transfer = torch.optim.lr_scheduler.StepLR(optimizer_transfer, step_size=learn_rate_step, gamma=learn_rate_gamma)

    if use_cuda:
        model_transfer = model_transfer.cuda()

    return model_transfer, optimizer_transfer, criterion_transfer, scheduler_transfer

Training Loop Function:

In [6]:
def train_model(n_epochs, loaders, model, optimizer, criterion, scheduler, fold_k, data_type, model_name, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    torch.autograd.set_detect_anomaly(True)
    valid_loss_min = np.Inf 
    epochs = []
    trainingloss = []
    validationloss = []
    valaccuracy = []
    learningrates = []

    for epoch in range(1, n_epochs+1):
        # initialize the variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        correct = 0.0
        total = 0.0

        preds = []
        targets = []
        
        ###################
        # training the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):     
            if use_cuda:
                data, target = data.cuda(), target.cuda()   
            
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
           
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
    
        ######################    
        # validating the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()

            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            pred = output.data.max(1,keepdim=True)[1]
            preds.append(pred)
            targets.append(target)
            # compare predictions
            correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
            total += data.size(0)
        
        train_loss = train_loss/len(train_ds)
        valid_loss = valid_loss/len(val_ds)
        valid_acc = correct / total
        current_lr = optimizer.param_groups[0]['lr']
        scheduler.step()

        trainingloss.append(train_loss)
        validationloss.append(valid_loss)
        valaccuracy.append(valid_acc)
        epochs.append(epoch)
        learningrates.append(current_lr)

        # printing training/validation statistics 
        print('Epoch: {} \nTraining Loss: {:.6f} \nValidation Loss: {:.6f} \nValidation Accuracy: {:.6f} \nCorrect: {} / {} \nCurrent LR: {}'.format(
            epoch, 
            train_loss,
            valid_loss,
            valid_acc,
            correct,
            total,
            current_lr
            ))
        
        ## saving the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(), save_path)
            
            valid_loss_min = valid_loss

    #GATHER TRAINING RESULTS IN DATAFRAME
    validationloss = [i.cpu().tolist() for i in validationloss]
    trainingloss = [i.cpu().tolist() for i in trainingloss]
    zipped_data = list(zip(epochs, learningrates, trainingloss, validationloss, valaccuracy))
    train_report = pd.DataFrame(zipped_data,columns=['Epoch','Learning Rate','Training Loss','Validation Loss','Validation Accuracy'])
    train_report['Test Fold'] = fold_k
    train_report['Model'] = model_name
    train_report['Data Representation'] = data_type
    train_report = train_report[['Model','Data Representation', 'Test Fold', 'Epoch', 'Learning Rate', 'Training Loss', 'Validation Loss', 'Validation Accuracy']]
    
    # return trained model
    return model, train_report

Training Script (Load Data From Disk, Train on All 10 Folds, Evaluate on Test Data, Generate Training and Test Metric Reports):

In [7]:
# Representation Name
data_type = 'Cochleagram'

# Model Name
model_name = 'VGG16'

# Perform 10 fold validation
for fold_k in range(1,num_classes+1):
    featuresdf = pd.read_pickle('cgram_224_comp3.pkl')
    model_transfer, optimizer_transfer, criterion_transfer, scheduler_transfer = init_model()
    train_df = featuresdf[featuresdf['fold'] != fold_k]
    val_df = featuresdf[featuresdf['fold'] == fold_k]
    test_fold = fold_k

    train_ds = UrbanSound8kDataset(train_df, transform=img_transforms['train'])
    val_ds = UrbanSound8kDataset(val_df, transform=img_transforms['valid'])

    #Creating loaders for the dataset
    loaders_transfer={
        'train':torch.utils.data.DataLoader(train_ds,batch_size,shuffle=True),
        'valid':torch.utils.data.DataLoader(val_ds,batch_size,shuffle=False)
    }

    del featuresdf, train_df, val_df

    if fold_k == 1:
        #TRAIN THE MODEL AND SAVE RESULTS TO TRAIN_REPORT
        train_report = train_model(n_epochs, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, scheduler_transfer, fold_k, data_type, model_name, model_name + '_' + data_type + '_fold' + str(fold_k) + '.pt')[1]
    else:
        #TRAIN THE MODEL AND ADD DATA TO TRAIN_REPORT
        train_report_temp = train_model(n_epochs, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, scheduler_transfer, fold_k, data_type, model_name, model_name + '_' + data_type + '_fold' + str(fold_k) + '.pt')[1]
        train_report = pd.concat([train_report, train_report_temp])
    
    del model_transfer, optimizer_transfer, criterion_transfer, scheduler_transfer
    
    #RELOAD FINAL CHECKPOINTED MODEL IN FOR VALIDATION RESULTS
    model_transfer = init_model()[0]
    model_transfer.load_state_dict(torch.load(model_name + '_' + data_type + '_fold' + str(fold_k) + '.pt'))
    model_transfer.eval()

    #PERFORM FINAL INFERENCE ON VALIDATION SET
    preds = []
    targets = []

    class_names = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']

    for batch_idx, (data, target) in enumerate(loaders_transfer['valid']):
        data, target = data.cuda(), target.cuda()
        output = model_transfer(data)
        prediction = torch.argmax(output, dim=1)
        preds.append(prediction.cpu().numpy())
        targets.append(target.cpu().numpy())

    targets = np.concatenate(targets)
    preds = np.concatenate(preds)

    metrics_report_dict = classification_report(targets, preds, target_names=class_names, output_dict=True)
    fold_acc_dict = {'Test Fold' : fold_k , 'Accuracy' : metrics_report_dict['accuracy']}
    del metrics_report_dict['accuracy']

    if fold_k == 1:
        metrics_report = pd.DataFrame(metrics_report_dict).rename_axis('metric').reset_index()
        fold_accuracies = pd.DataFrame(fold_acc_dict,index=[0])
        metrics_report.insert(0,'Test Fold',fold_k)
        metrics_report.insert(0,'Data Representation', data_type)
        metrics_report.insert(0,'Model', model_name)
    else:
        metrics_report_temp = pd.DataFrame(metrics_report_dict).rename_axis('metric').reset_index()
        fold_accuracies_temp = pd.DataFrame(fold_acc_dict,index=[0])
        metrics_report_temp.insert(0,'Test Fold',fold_k)
        metrics_report_temp.insert(0,'Data Representation', data_type)
        metrics_report_temp.insert(0,'Model', model_name)
        metrics_report = pd.concat([metrics_report, metrics_report_temp])
        fold_accuracies = pd.concat([fold_accuracies, fold_accuracies_temp])
    
    del data, target, output, prediction, targets, preds

train_report.to_csv('results/TrainReportbyEpoch_' + model_name + '_' + data_type + '.csv', index=False)
metrics_report.to_csv('results/TestMetricsbyClass_' + model_name + '_' + data_type + '.csv', index=False)
fold_accuracies.to_csv('results/FoldAccuracies_' + model_name + '_' + data_type + '.csv', index=False)

Epoch: 1 
Training Loss: 0.000454 
Validation Loss: 0.002211 
Validation Accuracy: 0.325688 
Correct: 284.0 / 872.0 
Current LR: 0.001
Epoch: 2 
Training Loss: 0.000219 
Validation Loss: 0.001958 
Validation Accuracy: 0.379587 
Correct: 331.0 / 872.0 
Current LR: 0.001
Epoch: 3 
Training Loss: 0.000196 
Validation Loss: 0.002384 
Validation Accuracy: 0.348624 
Correct: 304.0 / 872.0 
Current LR: 0.001
Epoch: 4 
Training Loss: 0.000177 
Validation Loss: 0.001853 
Validation Accuracy: 0.441514 
Correct: 385.0 / 872.0 
Current LR: 0.001
Epoch: 5 
Training Loss: 0.000183 
Validation Loss: 0.001979 
Validation Accuracy: 0.434633 
Correct: 379.0 / 872.0 
Current LR: 0.001
Epoch: 6 
Training Loss: 0.000177 
Validation Loss: 0.001879 
Validation Accuracy: 0.541284 
Correct: 472.0 / 872.0 
Current LR: 0.0007
Epoch: 7 
Training Loss: 0.000141 
Validation Loss: 0.001790 
Validation Accuracy: 0.494266 
Correct: 431.0 / 872.0 
Current LR: 0.0007
Epoch: 8 
Training Loss: 0.000126 
Validation Loss: 0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 1 
Training Loss: 0.000388 
Validation Loss: 0.002304 
Validation Accuracy: 0.280680 
Correct: 231.0 / 823.0 
Current LR: 0.001
Epoch: 2 
Training Loss: 0.000214 
Validation Loss: 0.002349 
Validation Accuracy: 0.371810 
Correct: 306.0 / 823.0 
Current LR: 0.001
Epoch: 3 
Training Loss: 0.000183 
Validation Loss: 0.001814 
Validation Accuracy: 0.452005 
Correct: 372.0 / 823.0 
Current LR: 0.001
Epoch: 4 
Training Loss: 0.000160 
Validation Loss: 0.001758 
Validation Accuracy: 0.460510 
Correct: 379.0 / 823.0 
Current LR: 0.001
Epoch: 5 
Training Loss: 0.000153 
Validation Loss: 0.001794 
Validation Accuracy: 0.459295 
Correct: 378.0 / 823.0 
Current LR: 0.001
Epoch: 6 
Training Loss: 0.000118 
Validation Loss: 0.001620 
Validation Accuracy: 0.551640 
Correct: 454.0 / 823.0 
Current LR: 0.0007
Epoch: 7 
Training Loss: 0.000103 
Validation Loss: 0.001728 
Validation Accuracy: 0.513973 
Correct: 423.0 / 823.0 
Current LR: 0.0007
Epoch: 8 
Training Loss: 0.000097 
Validation Loss: 0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 1 
Training Loss: 0.000405 
Validation Loss: 0.001865 
Validation Accuracy: 0.397059 
Correct: 324.0 / 816.0 
Current LR: 0.001
Epoch: 2 
Training Loss: 0.000204 
Validation Loss: 0.001820 
Validation Accuracy: 0.491422 
Correct: 401.0 / 816.0 
Current LR: 0.001
Epoch: 3 
Training Loss: 0.000179 
Validation Loss: 0.001804 
Validation Accuracy: 0.482843 
Correct: 394.0 / 816.0 
Current LR: 0.001
Epoch: 4 
Training Loss: 0.000159 
Validation Loss: 0.001620 
Validation Accuracy: 0.557598 
Correct: 455.0 / 816.0 
Current LR: 0.001
Epoch: 5 
Training Loss: 0.000138 
Validation Loss: 0.002079 
Validation Accuracy: 0.574755 
Correct: 469.0 / 816.0 
Current LR: 0.001
Epoch: 6 
Training Loss: 0.000110 
Validation Loss: 0.002091 
Validation Accuracy: 0.623775 
Correct: 509.0 / 816.0 
Current LR: 0.0007
Epoch: 7 
Training Loss: 0.000098 
Validation Loss: 0.001398 
Validation Accuracy: 0.649510 
Correct: 530.0 / 816.0 
Current LR: 0.0007
Epoch: 8 
Training Loss: 0.000090 
Validation Loss: 0