In [51]:
import pandas as pd
import torch
import copy
import time
from pathlib import Path
from torch import nn
from torch.utils.data import random_split
from torch.utils.tensorboard import SummaryWriter
from sound_classification_dataset import SoundDS
from sound_classification_model import AudioClassifier
from sklearn.model_selection import KFold

In [52]:
# ----------------------------
# Prepare training data from Metadata file
# ----------------------------

data_path = '/home/sunt/Downloads/UrbanSound8k'

# Read metadata file
metadata_file = data_path + '/UrbanSound8K.csv'
df = pd.read_csv(metadata_file)
df.head()

# Construct file path by concatenating fold and file name
df['relative_path'] = '/fold' + df['fold'].astype(str) + '/' + df['slice_file_name'].astype(str)

# Take relevant columns
# df = df[['relative_path', 'classID']]
df.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class,relative_path
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark,/fold5/100032-3-0-0.wav
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing,/fold5/100263-2-0-117.wav
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing,/fold5/100263-2-0-121.wav
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing,/fold5/100263-2-0-126.wav
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing,/fold5/100263-2-0-137.wav


In [53]:
df.loc[0, 'relative_path']

'/fold5/100032-3-0-0.wav'

In [54]:
kFold = KFold(n_splits=10, shuffle=True, random_state=0)
writer = SummaryWriter()
dataset = SoundDS(df, data_path)
# For fold results
results = {}

In [55]:
# # Random split of 80:20 between training and validation
# num_items = len(myds)
# num_train_val = round(num_items * 0.9)
# num_test = num_items - num_train
# train_val_ds, test_ds = random_split(myds, [num_train_val, num_test])

# # Create training and validation data loaders
# train_val_dl = torch.utils.data.DataLoader(train_val_ds, batch_size=16, shuffle=True)
# test_dl = torch.utils.data.DataLoader(test_ds, batch_size=16, shuffle=False)

In [56]:
# # Create the model and put it on the GPU if available
# model = AudioClassifier()
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

# Check that it is on Cuda
next(model.parameters()).device

device(type='cuda', index=0)

In [57]:
def reset_weights(m):
    # Try resetting model weights to avoid weight leakage.
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()

In [60]:
# ----------------------------
# Training Loop
# ----------------------------
def train_model(num_epochs):
    since = time.time()

    
    for fold, (train_ids, val_ids) in enumerate(kFold.split(dataset)):
        
        # Print
        print(f'FOLD {fold}')
        print('--------------------------------')

        # Sample elements randomly from a given list of ids, no replacement.
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        val_subsampler = torch.utils.data.SubsetRandomSampler(val_ids)

        # Define data loaders for training and testing data in this fold
        train_dl = torch.utils.data.DataLoader(
                          dataset, 
                          batch_size=16, sampler=train_subsampler)
        val_dl = torch.utils.data.DataLoader(
                          dataset,
                          batch_size=16, sampler=val_subsampler)
        
        # Create the model and put it on the GPU if available
        model = AudioClassifier()
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        # Reset model
        model.apply(reset_weights)

        # Loss Function, Optimizer and Scheduler
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                        steps_per_epoch=int(len(train_dl)),
                                                        epochs=num_epochs,
                                                        anneal_strategy='linear')

        # Repeat for each epoch
        for epoch in range(num_epochs):

            running_loss = 0.0

            # Repeat for each batch in the training set
            for i, data in enumerate(train_dl):
                # Get the input features and target labels, and put them on the GPU
                inputs, labels = data[0].to(device), data[1].to(device)

                # Normalize the inputs
                inputs_m, inputs_s = inputs.mean(), inputs.std()
                inputs = (inputs - inputs_m) / inputs_s

                # Zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)
                # Get the predicted class with the highest score
                _, prediction = torch.max(outputs,1)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # Keep stats for Loss and Accuracy
                running_loss += loss.item()

                #if i % 10 == 0:    # print every 10 mini-batches
                #    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10))

        # Process is complete.
        print('Training process has finished. Saving trained model.')

        # Saving the model
        save_path = f'./model-fold-{fold}.pth'
        torch.save(model.state_dict(), save_path)


        # Print about testing
        print('Starting testing')
        # Evaluationfor this fold
        correct, total = 0, 0
        with torch.no_grad():
            # Iterate over the test data and generate predictions
            for data in val_dl:
                # Get the input features and target labels, and put them on the GPU
                inputs, labels = data[0].to(device), data[1].to(device)

                # Normalize the inputs
                inputs_m, inputs_s = inputs.mean(), inputs.std()
                inputs = (inputs - inputs_m) / inputs_s

                # Generate outputs
                outputs = model(inputs)

                # Set total and correct
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            # Print accuracy
            print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
            print('--------------------------------')
            results[fold] = 100.0 * (correct / total)
   
    # Print fold results
    print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
    print('--------------------------------')
    sum = 0.0
    for key, value in results.items():
        print(f'Fold {key}: {value} %')
        sum += value
    print(f'Average: {sum/len(results.items())} %')

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

In [61]:
train_model(num_epochs=80)
# writer.flush()
# writer.close()

FOLD 0
--------------------------------


KeyboardInterrupt: 

In [27]:
# Visualizing the model predictions
# Generic function to display predictions for a few images

def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [52]:
# ----------------------------
# Inference
# ----------------------------
def inference (model, test_dl):
    correct_prediction = 0
    total_prediction = 0

    # Disable gradient updates
    with torch.no_grad():
        for data in test_dl:
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # Get predictions
            outputs = model(inputs)

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]
        
    acc = correct_prediction/total_prediction
    print(f'Accuracy: {acc:.2f}, Total items: {total_prediction}')

In [54]:
# Run inference on trained model with the validation set
# load best model weights
model_ft = AudioClassifier()
model_ft = model_ft.to(device)
model_ft.load_state_dict(torch.load('model.pt'))
model_ft.eval()
inference(model_ft, test_dl)

Accuracy: 0.28, Total items: 873
