In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, subdirs, filenames in os.walk('/kaggle/input/urbansound8k'):
    print(dirname)
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
os.listdir("/kaggle/input/urbansound8k")

In [None]:
!pip install torchsummary

In [None]:
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torchsummary import summary
from tqdm import tqdm

In [None]:
df=pd.read_csv('/kaggle/input/urbansound8k/UrbanSound8K.csv')

In [None]:
df

In [None]:
class UrbanSoundDataset(Dataset):
    def __init__(self,annotations_file,audio_dir,transformation,target_sr,num_samples,device):
        self.annotations=pd.read_csv(annotations_file)
        self.audio_dir=audio_dir
        self.device=device
        self.transformation=transformation.to(self.device)
        self.target_sr=target_sr
        self.num_samples=num_samples
        
        
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self,index):
        audio_sample_path=self._get_audio_path(index)
        label=self._get_audio_label(index)
        signal,sr=torchaudio.load(audio_sample_path)
        signal=signal.to(self.device)
        signal=self._resample(signal,sr)
        signal=self._to_mono(signal)
        
        if signal.shape[1]>self.num_samples:
            signal=self._trim(signal)
            
        elif signal.shape[1]<self.num_samples:
            signal=self._pad(signal)
        
        signal=self.transformation(signal)
        return signal,label
    
    def _get_audio_path(self,index):
        fold=f"fold{self.annotations.iloc[index,5]}"
        path=os.path.join(self.audio_dir,fold,self.annotations.iloc[index,0])
        return path
    
    def _get_audio_label(self,index):
        return self.annotations.iloc[index,-2]
    
    def _resample(self,signal,sr):
        if sr != self.target_sr:
            resampler=torchaudio.transforms.Resample(sr,self.target_sr) #(original_sr, target_sr)
            signal=resampler(signal)
        return signal
    
    def _to_mono(self,signal):
        if signal.shape[0]>1:# if signal not mono
            signal=torch.mean(signal,dim=0,keepdim=True)
        return signal
    
    def _trim(self,signal):
        return signal[:,:self.num_samples]
    
    def _pad(self,signal):
        length_signal=signal.shape[1]
        num_missing_samples=self.num_samples-length_signal
        last_dim_padding=(0,num_missing_samples)
        return F.pad(signal,last_dim_padding)
        
        

In [None]:
SAMPLE_RATE=22050
NUM_SAMPLES=55125

In [None]:
device='cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
ANNOTATIONS_PATH=r'/kaggle/input/urbansound8k/UrbanSound8K.csv'
AUDIO_DIR=r'/kaggle/input/urbansound8k'


mel_spec=torchaudio.transforms.MelSpectrogram(
                    sample_rate=SAMPLE_RATE,
                    n_fft=1024,
                    hop_length=512,
                    n_mels=33)

In [None]:
usd=UrbanSoundDataset(ANNOTATIONS_PATH,AUDIO_DIR,mel_spec,SAMPLE_RATE,NUM_SAMPLES,device)
print(f"{len(usd)} number of samples in the dataset")
signal,label = usd[1]

## for i in range(10):
    signal,label=usd[i]
    print(signal.shape)
    
#durations need to be constant

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=self._create_conv_block(in_channels=1,out_channels=32,kernel_size=3,pool_kernel_size=2,stride=1,padding=2)
        self.conv2=self._create_conv_block(in_channels=32,out_channels=64,kernel_size=2,pool_kernel_size=2,stride=1,padding=2)
        self.conv3=self._create_conv_block(in_channels=64,out_channels=128,kernel_size=3,pool_kernel_size=2,stride=1,padding=2)
        self.conv4=self._create_conv_block(in_channels=128,out_channels=256,kernel_size=2,pool_kernel_size=2,stride=1,padding=3)

        self.flatten=nn.Flatten()
        self.fc1=nn.Linear(256*5*10,512)
        self.dropout=nn.Dropout(p=0.5)
        self.fc2=nn.Linear(512,128)
        self.fc3=nn.Linear(128,10)
        self.softmax=nn.Softmax(dim=1)
        
    def _create_conv_block(self,in_channels,out_channels,kernel_size,pool_kernel_size,stride,padding):
        return nn.Sequential(
                            nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding),
                            nn.BatchNorm2d(num_features=out_channels),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=pool_kernel_size)
                            )
    
    def forward(self,x):
        x=self.conv1(x)
        x=self.conv2(x)
        x=self.conv3(x)
        x=self.conv4(x)
        x=self.flatten(x)
        x=self.fc1(x)
        x=self.dropout(x)
        x=self.fc2(x)
        x=self.fc3(x)
        preds=self.softmax(x)
        
        return preds

### To find flattening dimension

def __init():<br>
    x=torch.randn(33,108).view(-1,1,33,108) <br>
    self._convs(x)<br>
    <br>
def _convs(self,x):<br>
        x=self.conv1(x)<br>
        x=self.conv2(x)<br>
        x=self.conv3(x)<br>
        x=self.conv4(x)<br>
        print(x.shape)<br>
        x=self.flatten(x)<br>
        print(x.shape)<br>
        
 #### else use AdaptiveMaxPool2d
       

In [None]:
model=ConvNet().to(device)
summary(model,(1,33,108))


In [None]:
def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    return train_dataloader

In [None]:
def train_single_epoch(model, data_loader, loss_fn, optimiser, device):
    for input, target in tqdm(data_loader):
        input, target = input.to(device), target.to(device)

        # calculate loss
        prediction = model(input)
        loss = loss_fn(prediction, target)

        # backpropagate error and update weights
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

    print(f"loss: {loss.item()}")

In [None]:
def train(model, data_loader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_single_epoch(model, data_loader, loss_fn, optimiser, device)
        print("---------------------------")
    print("Finished training")

In [None]:
BATCH_SIZE=64
EPOCHS=5
LR=0.0003

In [None]:
train_dataloader = create_data_loader(usd, BATCH_SIZE)

In [None]:
print(model)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(),lr=LR)

In [None]:
train(model, train_dataloader, loss_fn, optimiser, device, EPOCHS)

In [None]:
torch.save(model.state_dict(), "cnn_model.pth")
print("Trained feed forward net saved at cnn_model.pth")