<a href="https://colab.research.google.com/github/nefario7/Pytorch/blob/master/Urbansound8K_in_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing and Importing the dependencies

In [0]:
!pip install torchaudio
import os
import pandas as pd 
import matplotlib.pyplot as plt
import librosa as lb
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio.transforms
import torchaudio
import numpy as np
from torch import optim 
from torch.utils.data import Dataset
from tqdm import tqdm

Collecting torchaudio
[?25l  Downloading https://files.pythonhosted.org/packages/9c/7d/8e01e21175dd2c9bb1b7e014e0c56cdd02618e2db5bebb4f52f6fdf253cb/torchaudio-0.5.0-cp36-cp36m-manylinux1_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 2.8MB/s 
Installing collected packages: torchaudio
Successfully installed torchaudio-0.5.0


# Defining Custom Dataset

In [0]:
#*Functions to save and laod lists
def saveList(myList, filename):
    # the filename should mention the extension 'npy'
    np.save(filename,myList)
    print("Saved successfully!")
def loadList(filename):
    # the filename should mention the extension 'npy'
    tempNumpyArray=np.load(filename)
    return tempNumpyArray.tolist()

#*CUDA for PyTorch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("USING : {}".format(device))

path = os.getcwd()
print("Current Working Directory : {}".format(path))

#*Paths for the data and metadata of the UrbansSound 8K Dataset
csv_path = r"/mnt/d/Code/UrbanSound8K/metadata"
file_path = r"/mnt/d/Code/UrbanSound8K/audio"
csvData = pd.read_csv(os.path.join(csv_path, "UrbanSound8K.csv"))
print("\nMETADATA SAMPLE")
print(csvData.head())

#*Parameters and others
max_epochs = 100
layer_nodes = [11479, 5000, 2000, 200, 10]
identifiers = {0 : 'air_conditioner',
                1 : 'car_horn',
                2 : 'children_playing', 
                3 : 'dog_bark',
                4 : 'drilling',
                5 : 'engine_idling',
                6 : 'gun_shot',
                7 : 'jackhammer',
                8 : 'siren',
                9 : 'street_music'}


#*Class defining the Custom Dataset
class urbansound(Dataset):
    def __init__(self, csv_path, file_path, type, transform = None):
        self.file_path = file_path
        self.csv_path = csv_path
        self.file_names = []
        self.labels = []

        metadata = pd.read_csv(os.path.join(csv_path, "UrbanSound8K.csv"))
        if os.path.exists(r"/mnt/d/Code/filenames" + type + ".npy") and os.path.exists(r"/mnt/d/Code/labels" + type + ".npy"):
            self.file_names = loadList('filenames' + type + '.npy')
            self.labels = loadList('labels' + type + '.npy')
        else:
            for i in tqdm(range(len(metadata))):
                if metadata.iloc[i,0] in os.listdir(file_path):
                    self.file_names.append(metadata.iloc[i, 0])
                    self.labels.append(metadata.iloc[i, 6])
            saveList(self.file_names, 'filenames' + type + '.npy')
            saveList(self.labels, 'labels' + type + '.npy')

    def __getitem__(self, index):
        path = os.path.join(self.file_path, self.file_names[index])
        try:
            sound, srate = torchaudio.load(path, out=None, normalization=True)
        except FileNotFoundError or OSError:
            print("\nFile in metadata not found, continuing.")

        resound = torchaudio.transforms.Resample(srate, 44100)(sound)
        monosound = torch.mean(resound, dim=0)
        features = torchaudio.transforms.MFCC(sample_rate=srate, n_mfcc=13)(monosound)
        features = features.reshape(1, -1)

        reqpad = layer_nodes[0] - features.size()[1]
        features = torch.nn.functional.pad(features, (0,reqpad), mode='constant', value=0)

        return features, self.labels[index]

    def __len__(self):
        return len(self.file_names)


# Loading the Training and Testdata

In [0]:

#*Load the Training Data 
trainset = urbansound(csv_path, os.path.join(file_path, 'all'), 'train')
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True) 

#*Load the Test Data
testset = urbansound(csv_path, os.path.join(file_path, 'nntest'), 'test')
testloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

print("\nDATASET SIZES")
print("Train set size: {}".format(len(trainset)))
print("Test set size: {}".format(len(testset)))


#?Sample output from the dataset for verification
# for i in range(len(testset)):
#     sample = testset[i]
#     print(i, sample[0], sample[1])

#     if i == 4:
#         break

# Neural Network Definition

In [0]:
#*Definition of the Neural Network
class neuralnet(nn.Module):
    def __init__(self):
        super(neuralnet, self).__init__()
        self.fc1 = nn.Linear(layer_nodes[0], layer_nodes[1])
        self.fc2 = nn.Linear(layer_nodes[1], layer_nodes[2])
        self.fc3 = nn.Linear(layer_nodes[2], layer_nodes[3])
        self.fc4 = nn.Linear(layer_nodes[3], layer_nodes[4])
        self.dropout = nn.Dropout(p=0.25)                            #? Should I add more dropouts with diff probabilities

    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        output = self.dropout(F.log_softmax(self.fc4(x), dim=1))

        return output

test = neuralnet()
print("\nNEURAL NETWORK ARCHITECTURE")
print(test)

# Sample Outputs for verification

In [0]:
#?Sample output from Neural Net for verification
# random_data, sampling = torchaudio.load(r"/mnt/d/Code/UrbanSound8K/audio/all/209992-5-3-5.wav", out=None, normalization=True)
# res_data = torchaudio.transforms.Resample(sampling, 44100)(random_data)
# mono_data = torch.mean(res_data, dim=0)
# feats = torchaudio.transforms.MFCC(sample_rate=sampling, n_mfcc=13)(mono_data)
# feats = feats.reshape(1, -1)

# result = test(feats)
# print(result)

#?Sample from Dataloader
# train_iter = iter(trainloader)
# f, l = train_iter.next()
# print(f.shape, l.shape)

# f = f.view(f.shape[0], -1)
# print(f.shape)

# ps = torch.exp(acoustclassifier(f))
# print(ps.shape)

# top_p, top_class = ps.topk(1, dim=1)
# print(top_p, top_class)
# print('features shape on batch size = {}'.format(f.size()))
# print('labels shape on batch size = {}'.format(l.size()))

# Training and Testing

In [0]:

#*Defining Loss calulcation criteria and updation parameters
acoustclassifier = neuralnet()
criterion = nn.NLLLoss()
optimizer = optim.SGD(acoustclassifier.parameters(), lr = 0.001)

train_losses = []
test_losses = []

#*Training Loop
max_epochs = 10    
print("\nTRAINING")                                    #todo: Check the no. of epochs needed
for e in range(max_epochs):
    runningloss = 0
    for og_features, og_labels in tqdm(trainloader):
        acoustclassifier.train()
        optimizer.zero_grad()

        og_features = og_features.view(og_features.shape[0], -1)

        output = acoustclassifier(og_features)
        loss = criterion(output, og_labels)        
        loss.backward()
        optimizer.step()

        runningloss += loss.item()
        # print("Running Loss = {}".format(runningloss))
    else:
        test_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            acoustclassifier.eval()
            print("\nTESTING")
            for test_features, test_labels in testloader:
                test_features = test_features.view(test_features.shape[0], -1)

                logps = acoustclassifier(test_features)
                test_loss += criterion(logps, test_labels)
                ps = torch.exp(logps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == test_labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                
        train_losses.append(runningloss/len(trainloader))
        test_losses.append(test_loss/len(testloader))

        torch.save(acoustclassifier.state_dict(), r"/mnt/d/Code/checkpoints/checkpoint_" + str(e+1) + ".pth")
        print("Chkpt {} saved!\t".format(e+1),
              "Epoch: {}/{}\t".format(e+1, max_epochs),
              "Training Loss: {:.3f}\t".format(runningloss/len(trainloader)),
              "Test Loss: {:.3f}\t".format(test_loss/len(testloader)),
              "Test Accuracy: {:.3f}\t".format(accuracy/len(testloader)))

#*Saving the trained model
print(acoustclassifier.state_dict().keys())
torch.save(acoustclassifier.state_dict(), r"/mnt/d/Code/trainedmodel.pth")
print("MODEL TRAINED AND SAVED!")