In [1]:
# base libraries
import os
import pandas as pd
from tqdm import tqdm
from tabulate import tabulate

# pytorch
import torch
import torchaudio
import torch.nn as nn
from torchaudio import transforms
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter

# sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split

# jupyter notebook configuration
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
# google disc path
data_path = '/content/drive/MyDrive/UrbanSound8K'
audio_path = '/content/drive/MyDrive/UrbanSound8K/audio/'

# Read metadata file
metadata_file = data_path + '/metadata/UrbanSound8K.csv'
df = pd.read_csv(metadata_file)
df.head()

In [None]:
print(f'metadata shape: {df.shape}')

In [None]:
# exctract values
files = df["slice_file_name"].values.tolist()
folds = df["fold"].values
labels = df["classID"].values.tolist()

In [None]:
class SundDS(Dataset):
    def __init__(self, path, class_id):
        self.path = path
        self.class_id = class_id
        self.sr = 44100

    def __len__(self):
        return len(self.path)

    def __getitem__(self, idx):
        path = self.path[idx]

        # load audio
        waveform, sr = torchaudio.load(path, normalize=True)

        # convert sterio to mono
        mono = torch.mean(waveform, dim=0, keepdim=True)

        data = torch.zeros([1, self.sr])

        if mono.numel() < self.sr:
            data[:, :mono.numel()] = mono
        else:
            data = mono[:, :self.sr]

        mel_specgram = torchaudio.transforms.MelSpectrogram(sr)(data)

        # noramalization
        mel_specgram_norm = (
            mel_specgram - mel_specgram.mean()) / mel_specgram.std()

        mfcc = torchaudio.transforms.MFCC(sample_rate=sr)(data)

        # noramalization
        mfcc_norm = (mfcc - mfcc.mean()) / mfcc.std()

        features = torch.cat([mel_specgram_norm, mfcc_norm], axis=1)

        return {"spec": torch.tensor(features[0].permute(1, 0), dtype=torch.float),
                "label": torch.tensor(self.class_id[idx], dtype=torch.long)}

In [None]:
# create path with training data
path = [os.path.join(audio_path + "fold" + str(folder) + "/" + file)
        for folder, file in zip(folds, files)]

X_train, X_test, y_train, y_test = train_test_split(
    path, labels, random_state=42, test_size=0.25)

In [None]:
# create custom datasets
train, test = SundDS(X_train, y_train), SundDS(X_test, y_test)

def collate_fn(data):
    labels, specs = [], []
    for d in data:
        spec = d["spec"].to(device)
        label = d["label"].to(device)
        specs.append(spec)
        labels.append(label)

    spec = torch.nn.utils.rnn.pad_sequence(
        specs, batch_first=True, padding_value=0.)
    labels = torch.tensor(labels)

    return spec, labels


train_loader = torch.utils.data.DataLoader(
    train, batch_size=32, shuffle=True, drop_last=True, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(
    test, batch_size=32, shuffle=False, drop_last=True, collate_fn=collate_fn)

In [None]:
# model
class LSTM(nn.Module):
    def __init__(self, classes=10, n_feature=5, n_hidden=128, n_layers=1, drop=0.2):
        super().__init__()
        self.drop = drop
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_feature = n_feature
        self.lstm = nn.LSTM(self.n_feature, self.n_hidden,
                            self.n_layers, dropout=self.drop, batch_first=True)
        self.dropout = nn.Dropout(drop)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(int(n_hidden), int(n_hidden/2))
        self.fc2 = nn.Linear(int(n_hidden/2), classes)

    def forward(self, x, hidden):
        out, hidden = self.lstm(x, hidden)
        out = self.dropout(out)
        out = self.fc1(out)
        out = self.fc2(out[:, -1, :])
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        return hidden

In [None]:
# save the model state
def save_model(state, filename):
    torch.save(state, filename)
    print("Model saved!")


# train
def train(data_loader, model, epoch, optimizer, device):
    losses = []
    accuracies = []
    labels = []
    preds = []
    model.train()

    loop = tqdm(data_loader)  # progress bar
    for batch_idx, (data, target) in enumerate(loop):
        # load data to gpu
        data = data.to(device)
        target = target.to(device)

        model.zero_grad()
        output, hidden_state = model(data, model.init_hidden(32))

        # loss function
        loss = nn.CrossEntropyLoss()(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        probs = torch.softmax(output, dim=1)
        winners = probs.argmax(dim=1)
        true_pred = (winners == target)

        # claculate accuracy of the calculations
        accuracy = true_pred.sum().float() / float(target.size(0))
        accuracies.append(accuracy)

        labels += torch.flatten(target).cpu()
        preds += torch.flatten(winners).cpu()

        loop.set_description(
            f"EPOCH: {epoch} | ITERATION : {batch_idx} / {len(data_loader)} | LOSS: {loss.item()} | ACCURACY: {accuracy}")
        loop.set_postfix(loss=loss.item())

    avg_train_loss = sum(losses) / len(losses)
    avg_train_accuracy = sum(accuracies) / len(accuracies)
    report = metrics.classification_report(
        torch.tensor(labels).numpy(), torch.tensor(preds).numpy())
    print(report)

    return avg_train_loss, avg_train_accuracy

In [None]:
# save the model state
def save_model(state, filename):
    torch.save(state, filename)
    print("Model saved!")


def train(data_loader, model, epoch, optimizer, device):
    losses, labels, preds = [], [], []
    train_accuracies = []

    model.train()

    loop = tqdm(data_loader)  # progress bar
    for batch_idx, (data, target) in enumerate(loop):
        # load data to gpu
        data = data.to(device)
        target = target.to(device)

        model.zero_grad()
        output, hidden_state = model(data, model.init_hidden(32))

        # loss function
        loss = nn.CrossEntropyLoss()(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        probs = torch.softmax(output, dim=1)
        winners = probs.argmax(dim=1)
        true_pred = (winners == target)

        # claculate accuracy of the calculations
        accuracy = true_pred.sum().float() / float(target.size(0))
        train_accuracies.append(accuracy)

        labels += torch.flatten(target).cpu()
        preds += torch.flatten(winners).cpu()

        loop.set_description(
            f"EPOCH: {epoch} | ITERATION : {batch_idx} / {len(data_loader)} | LOSS: {loss.item()} | ACCURACY: {accuracy}")
        loop.set_postfix(loss=loss.item())

    avg_train_loss = sum(losses) / len(losses)
    avg_train_accuracy = sum(train_accuracies) / len(train_accuracies)
    report = metrics.classification_report(
        torch.tensor(labels).numpy(), torch.tensor(preds).numpy())
    print(report)

    return avg_train_loss, avg_train_accuracy


def test(data_loader, model, optimizer, device):
    model.eval()
    accs, preds, labels = [], [], []
    test_accuracies = []

    with torch.no_grad():
        loop = tqdm(data_loader)

        for batch_idx, (data, target) in enumerate(loop):
            data = data.to(device)
            target = target.to(device)

            output, hidden_state = model(data, model.init_hidden(32))

            probs = torch.softmax(output, dim=1)
            winners = probs.argmax(dim=1)
            corrects = (winners == target)
            accuracy = corrects.sum().float() / float(target.size(0))
            test_accuracies.append(accuracy)

            labels += torch.flatten(target).cpu()
            preds += torch.flatten(winners).cpu()

    avg_test_acc = sum(test_accuracies) / len(test_accuracies)

    return avg_test_acc