In [1]:
import pandas as pd
import torch
import copy
import time
from pathlib import Path
from torch import nn
from torch.utils.data import random_split
from torch.utils.tensorboard import SummaryWriter
from sound_classification_dataset import SoundDS
from sound_classification_model import AudioClassifier

In [None]:
# ----------------------------
# Prepare training data from Metadata file
# ----------------------------

data_path = 'UrbanSound8k'

# Read metadata file
metadata_file = data_path + '/UrbanSound8K.csv'
df = pd.read_csv(metadata_file)
df.head()

# Construct file path by concatenating fold and file name
df['relative_path'] = '/fold' + df['fold'].astype(str) + '/' + df['slice_file_name'].astype(str)

# Take relevant columns
# df = df[['relative_path', 'classID']]
df.head()

In [68]:
df.shape

(8732, 9)

In [69]:
writer = SummaryWriter()
myds = SoundDS(df, data_path)

In [70]:
# Random split of 80:20 between training and validation
num_items = len(myds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(myds, [num_train, num_val])

# Create training and validation data loaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=128, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=128, shuffle=False)

In [71]:
len(train_dl)

55

In [27]:
# Create the model and put it on the GPU if available
model = nn.DataParallel(AudioClassifier())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Check that it is on Cuda
next(model.parameters()).device

device(type='cuda', index=0)

In [28]:
# ----------------------------
# Training Loop
# ----------------------------
def training(model, train_dl, num_epochs):
    # Loss Function, Optimizer and Scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

    # Repeat for each epoch
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_prediction = 0
        total_prediction = 0

        # Repeat for each batch in the training set
        for i, data in enumerate(train_dl):
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # Zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Keep stats for Loss and Accuracy
            running_loss += loss.item()

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]
            #if i % 10 == 0:    # print every 10 mini-batches
            #    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10))

        # Print stats at the end of the epoch
        num_batches = len(train_dl)
        avg_loss = running_loss / num_batches
        avg_acc = correct_prediction/total_prediction
        writer.add_scalar("Loss/train", avg_loss, epoch)
        writer.add_scalar("Acc/train", avg_acc, epoch)
        print(f'Epoch: {epoch}, Loss: {avg_loss:.2f}, Accuracy: {avg_acc:.2f}')

    print('Finished Training')

In [29]:
num_epochs=100
training(model, train_dl, num_epochs)

Epoch: 0, Loss: 2.22, Accuracy: 0.19
Epoch: 1, Loss: 2.10, Accuracy: 0.27
Epoch: 2, Loss: 2.01, Accuracy: 0.30
Epoch: 3, Loss: 1.93, Accuracy: 0.33
Epoch: 4, Loss: 1.86, Accuracy: 0.35
Epoch: 5, Loss: 1.78, Accuracy: 0.39
Epoch: 6, Loss: 1.70, Accuracy: 0.43
Epoch: 7, Loss: 1.61, Accuracy: 0.46
Epoch: 8, Loss: 1.54, Accuracy: 0.48
Epoch: 9, Loss: 1.47, Accuracy: 0.50
Epoch: 10, Loss: 1.40, Accuracy: 0.53
Epoch: 11, Loss: 1.36, Accuracy: 0.54
Epoch: 12, Loss: 1.30, Accuracy: 0.56
Epoch: 13, Loss: 1.25, Accuracy: 0.58
Epoch: 14, Loss: 1.21, Accuracy: 0.59
Epoch: 15, Loss: 1.17, Accuracy: 0.60
Epoch: 16, Loss: 1.12, Accuracy: 0.62
Epoch: 17, Loss: 1.08, Accuracy: 0.64
Epoch: 18, Loss: 1.05, Accuracy: 0.65
Epoch: 19, Loss: 1.01, Accuracy: 0.66
Epoch: 20, Loss: 0.98, Accuracy: 0.67
Epoch: 21, Loss: 0.95, Accuracy: 0.68
Epoch: 22, Loss: 0.90, Accuracy: 0.70
Epoch: 23, Loss: 0.88, Accuracy: 0.71
Epoch: 24, Loss: 0.86, Accuracy: 0.71
Epoch: 25, Loss: 0.83, Accuracy: 0.73
Epoch: 26, Loss: 0.81,

In [30]:
torch.save(model.state_dict(), 'model.pt')

In [72]:
# ----------------------------
# Inference
# ----------------------------
def inference (model, test_dl):
    correct_prediction = 0
    total_prediction = 0

    # Disable gradient updates
    with torch.no_grad():
        for data in test_dl:
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # Get predictions
            outputs = model(inputs)

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]
        
    acc = correct_prediction/total_prediction
    print(f'Accuracy: {acc:.2f}, Total items: {total_prediction}')

In [60]:
# Run inference on trained model with the validation set load best model weights
model_inf = nn.DataParallel(AudioClassifier())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_inf = model_inf.to(device)
model_inf.load_state_dict(torch.load('model.pt'))
model_inf.eval()
inference(model_inf, val_dl)

Accuracy: 0.90, Total items: 1746
