<a href="https://colab.research.google.com/github/nefario7/Pytorch/blob/master/Urbansound8K_in_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mounting Google Drive to save model

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
#!ls "/content/gdrive/My Drive"


# Downloading and Processing Data from Kaggle

In [0]:
from google.colab import files
files.upload()

In [0]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!chmod 600 ~/.kaggle/kaggle.json

In [0]:
!kaggle datasets download -d chrisfilo/urbansound8k

In [0]:
from zipfile import ZipFile
filename = 'urbansound8k.zip'

with ZipFile(filename, 'r') as zip:
  zip.extractall()
  print('Done')

# Creating the training and testing folders

In [0]:
import os, shutil
import numpy as np
import random

#Combining all the files in one folder
destination = r"C:\Users\gargchi\Documents\torchdir\UrbanSound8K\UrbanSound8K\audio\all"
folders = r"C:\Users\gargchi\Documents\torchdir\UrbanSound8K\UrbanSound8K\audio\fold"

alldir = 'all'
os.mkdir(alldir)

for i in range(1,11):
    fpath = 'fold' + str(i)
    for aud in os.listdir(fpath):
        shutil.move(os.path.join(fpath, aud), alldir)
print('Done!')

In [0]:
print(len(os.listdir('all')))

In [0]:
#*Creating Test Data
os.mkdir('nntest')
numtest = 873
for i in np.arange(numtest):
    dirpath = 'all'
    filename = random.choice(os.listdir(dirpath))
    testsource = os.path.join(dirpath, filename)
    shutil.move(testsource,'nntest')

In [0]:
print(len(os.listdir('nntest')))

In [0]:
import torch
if torch.cuda.is_available():
    print("Yay")
    print(torch.cuda.get_device_name(0))
    use_cuda = True
else:
    print("Nay")
    use_cuda = False

# Loading Tensorboard 

In [0]:
%load_ext tensorboard

In [0]:
import os
LOG_DIR = 'runs'
os.makedirs(LOG_DIR, exist_ok=True)
# !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
# !unzip ngrok-stable-linux-amd64.zip
# get_ipython().system_raw(
#     'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
#     .format(LOG_DIR))

# get_ipython().system_raw('./ngrok http 6006 &')

# ! curl -s http://localhost:4040/api/tunnels | python3 -c \
#     "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

# Installing and Importing the dependencies

In [0]:
!pip install torchaudio
import os
import pandas as pd 
import matplotlib.pyplot as plt
import librosa as lb
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio.transforms
import torchaudio
import numpy as np
from torch import optim 
from torch.utils.data import Dataset
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

# Defining Custom Dataset

In [0]:
path = os.getcwd()
print("Current Working Directory : {}".format(path))

#*Paths for the data and metadata of the UrbansSound 8K Dataset
csv_path = 'UrbanSound8K.csv'
train_path = 'all'
test_path = 'nntest'

csvData = pd.read_csv(csv_path)
print("\nMETADATA SAMPLE")
print(csvData.head())

#*Parameters and others
max_epochs = 100
layer_nodes = [11479, 5000, 2000, 200, 10]
identifiers = {0 : 'air_conditioner',
                1 : 'car_horn',
                2 : 'children_playing', 
                3 : 'dog_bark',
                4 : 'drilling',
                5 : 'engine_idling',
                6 : 'gun_shot',
                7 : 'jackhammer',
                8 : 'siren',
                9 : 'street_music'}


#*Class defining the Custom Dataset
class urbansound(Dataset):
    def __init__(self, csv_path, file_path, transform = None):
        self.file_path = file_path
        self.csv_path = csv_path
        self.file_names = []
        self.labels = []

        metadata = pd.read_csv(csv_path)
        for i in tqdm(range(len(metadata))):
            if metadata.iloc[i,0] in os.listdir(file_path):
                self.file_names.append(metadata.iloc[i, 0])
                self.labels.append(metadata.iloc[i, 6])

    def __getitem__(self, index):
        path = os.path.join(self.file_path, self.file_names[index])
        sound, srate = torchaudio.load(path, out=None, normalization=True)

        resound = torchaudio.transforms.Resample(srate, 44100)(sound)
        monosound = torch.mean(resound, dim=0)
        features = torchaudio.transforms.MFCC(sample_rate=srate, n_mfcc=13)(monosound)
        features = features.reshape(1, -1)

        reqpad = layer_nodes[0] - features.size()[1]
        features = torch.nn.functional.pad(features, (0, reqpad), mode='constant', value=0)

        return features, self.labels[index]

    def __len__(self):
        return len(self.file_names)


# Loading the Training and Testdata

In [0]:
#*Load the Training Data 
trainset = urbansound(csv_path, train_path)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True) 

#*Load the Test Data
testset = urbansound(csv_path, test_path)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=True)

print("\nDATASET SIZES")
print("Train set size: {}".format(len(trainset)))
print("Test set size: {}".format(len(testset)))

# Neural Network Definition

In [0]:
#*Definition of the Neural Network
class neuralnet(nn.Module):
    def __init__(self):
        super(neuralnet, self).__init__()
        self.fc1 = nn.Linear(layer_nodes[0], layer_nodes[1])
        self.fc2 = nn.Linear(layer_nodes[1], layer_nodes[2])
        self.fc3 = nn.Linear(layer_nodes[2], layer_nodes[3])
        self.fc4 = nn.Linear(layer_nodes[3], layer_nodes[4])
        self.dropout = nn.Dropout(p=0.25)                            #? Should I add more dropouts with diff probabilities

    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        output = self.dropout(F.log_softmax(self.fc4(x), dim=1))

        return output

test = neuralnet()
print("\nNEURAL NETWORK ARCHITECTURE")
print(test)

# Sample Outputs for verification

In [0]:
#Sample output from Neural Net for verification
random_data, sampling = torchaudio.load(r"all/209992-5-3-5.wav", out=None, normalization=True)
res_data = torchaudio.transforms.Resample(sampling, 44100)(random_data)
mono_data = torch.mean(res_data, dim=0)
feats = torchaudio.transforms.MFCC(sample_rate=sampling, n_mfcc=13)(mono_data)
feats = feats.reshape(1, -1)
feats = feats

result = test(feats)
print(result)

#Sample from Dataloader
samplenet = neuralnet()
if use_cuda:
    samplenet.cuda()

train_iter = iter(trainloader)
f, l = train_iter.next()
if use_cuda:
    f, l = f.cuda(), l.cuda()

print(f.shape, l.shape)

f = f.view(f.shape[0], -1)
print(f.shape)

ps = torch.exp(samplenet(f))
print(ps.shape)

# top_p, top_class = ps.topk(1, dim=1)
# print(top_p, top_class)
# print('features shape on batch size = {}'.format(f.size()))
# print('labels shape on batch size = {}'.format(l.size()))

# Training and Testing

In [0]:
#*Defining Loss calulcation criteria and updation parameters
logs_base_dir = "runs"
%tensorboard --logdir {logs_base_dir}

writer = SummaryWriter("runs")

acoustclassifier = neuralnet()
if use_cuda:
    acoustclassifier.cuda()

criterion = nn.NLLLoss()
optimizer = optim.SGD(acoustclassifier.parameters(), lr = 0.001)

train_losses = []
test_losses = []

#*Training Loop
max_epochs = 10    
print("TRAINING")               

for e in range(max_epochs):
    runningloss = 0

    for i, data in enumerate(trainloader):
        acoustclassifier.train()
        optimizer.zero_grad()

        og_features, og_labels = data
        og_features = og_features.view(og_features.shape[0], -1)
        if use_cuda:
            og_features = og_features.cuda()
            og_labels = og_labels.cuda()

        output = acoustclassifier(og_features)
        loss = criterion(output, og_labels)        
        loss.backward()
        optimizer.step()

        runningloss += loss.item()
        print('[{}, {}] runningloss: {:.3f} loss: {:.3f}'.format(e + 1, i + 1, runningloss/len(trainloader), loss.item()))
        
        writer.add_graph(acoustclassifier, og_features)
        writer.add_scalar('Training Loss:', runningloss/len(trainloader), e * len(trainloader) + i)
        writer.add_scalar('Loss:', loss.item(), e * len(trainloader) + i)
    else:
        test_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():

            acoustclassifier.eval()
            print("\nTESTING")

            for test_features, test_labels in testloader:
                test_features = test_features.view(test_features.shape[0], -1)
                if use_cuda:    
                    test_features = test_features.cuda()
                    test_labels = test_labels.cuda()

                logps = acoustclassifier(test_features)
                test_loss += criterion(logps, test_labels)
                ps = torch.exp(logps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == test_labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                
        train_losses.append(runningloss/len(trainloader))
        test_losses.append(test_loss/len(testloader))

        if not os.path.exists('checkpoints'):
            os.makedirs('checkpoints')

        torch.save(acoustclassifier.state_dict(), r"/content/gdrive/My Drive/UrbanSound8K/checkpoints/chkpt_" + str(e+1) + ".pth")

        print("Chkpt {} saved!\t".format(e+1),
              "Epoch: {}/{}\t".format(e+1, max_epochs),
              "Training Loss: {:.3f}\t".format(runningloss/len(trainloader)),
              "Test Loss: {:.3f}\t".format(test_loss/len(testloader)),
              "Test Accuracy: {:.3f}\t".format(accuracy/len(testloader)))
        
        writer.add_scalar("Test Accuracy:", accuracy/len(testloader), e * len(testloader) + i)
        writer.add_scalar("Test Loss:", test_loss/len(testloader), e * len(testloader) + i)

writer.close()
#*Saving the trained model
print(acoustclassifier.state_dict().keys())
# torch.save(acoustclassifier.state_dict(), 'trained_model.pth')

path = "/content/gdrive/My Drive/UrbanSound8K/trainedmodel.pth" 
torch.save(model.state_dict(), path)
print("MODEL TRAINED AND SAVED!")