# Libraries
***

In [6]:
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchaudio as ta
from torchaudio import transforms
from torch.utils.data import DataLoader, Dataset
import sys

import os, re, shutil, copy, zipfile, glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
from pathlib import Path

from tqdm import tqdm, trange

from pyjanitor import auto_toc
toc = auto_toc()

from pickling import *

Let’s check if a CUDA GPU is available and select our device. Running
the network on a GPU will greatly decrease the training/testing runtime.




In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Code Breakdown
***

### Unpacking the Dataset

For reproducibility, upload the dataset .zip file on the same level as this notebook.

In [19]:
zip_ = 'data.zip' # Replace with downloaded .zip from Kaggle
with zipfile.ZipFile(zip_, 'r') as zip_ref:
        zip_ref.extractall('.')
        
classes = [x.split('/')[-1] for x in glob.glob('./data/*')]

### Rename the Files

Rename the .wav files for ease of use

In [20]:
# def create_classes(class_):
#     """Creates directory for each class"""
#     dirs = './data'
#     for c in tqdm(class_):
#         if not os.path.exists(os.path.join(dirs, c)):
#             os.makedirs(os.path.join(dirs, c))
            
            
def rename_audio(class_):
    """Renames each audio in each class"""
    for c in tqdm(class_):
        path = os.path.join('./data', c)
        for i, audio in enumerate(os.listdir(path)):
            if audio.endswith('.wav'):
                new_name = f'{c}_{i}.wav'
                os.rename(os.path.join(path, audio),
                          os.path.join(path, new_name))

In [21]:
# create_classes(classes)
rename_audio(classes)

100%|██████████| 14/14 [00:00<00:00, 260.22it/s]


### Create a Dataset

Create a subset of the dataset segmented into the different modelling stage : training, validation, and test.

In [24]:
def create_dataset(src, dst, class_, dist=(.6,.2,.2), overwrite=False):
    """Copy images of class `class_` using `dist` from src to dst.
    """
        
    if os.path.exists(dst) and overwrite:
        shutil.rmtree(dst)
    
    for c in tqdm(class_):
        c_path = os.path.join(src, c)
        n_data = len(os.listdir(c_path))
        ns = list(map(lambda x: int(n_data*x), dist))
        ns = [0]+[x+sum(ns[:i]) for i, x in enumerate(ns)]

        
        for i, stage in enumerate(['train', 'validation', 'test']):
            stage_path = os.path.join(dst, stage)
            if not os.path.exists(stage_path):
                os.makedirs(stage_path)
            elif os.path.exists(stage_path) and overwrite == False:
                continue
                
            label_path = os.path.join(stage_path, c)
            os.makedirs(label_path)
            
            for j in range(ns[i],ns[i+1]):
                fname = f'{c}_{j}.wav'
                src_file = os.path.join(c_path, fname)
                dst_file = os.path.join(label_path, fname)
                shutil.copyfile(src_file, dst_file)
        
    for stage in ['train', 'validation', 'test']:
        for c in class_:
            label_path = os.path.join(os.path.join(dst, stage), c)
            n_data = len(os.listdir(label_path))
            print(f'Total {stage.title()} {c.title()} Audio:', f'\t{n_data}')

In [26]:
src = 'data'
dst = 'data/subset'
create_dataset(src, dst, classes, overwrite=True)

100%|██████████| 14/14 [00:00<00:00, 33.38it/s]


Total Train Ar Audio: 	3
Total Train Fa Audio: 	3
Total Train Hi Audio: 	3
Total Train Id Audio: 	2
Total Train Ja Audio: 	3
Total Train Kn Audio: 	3
Total Train My Audio: 	3
Total Train Ne Audio: 	3
Total Train Pa Audio: 	3
Total Train Si Audio: 	3
Total Train Ta Audio: 	3
Total Train Th Audio: 	3
Total Train Ur Audio: 	3
Total Train Zh Audio: 	3
Total Validation Ar Audio: 	1
Total Validation Fa Audio: 	1
Total Validation Hi Audio: 	1
Total Validation Id Audio: 	0
Total Validation Ja Audio: 	1
Total Validation Kn Audio: 	1
Total Validation My Audio: 	1
Total Validation Ne Audio: 	1
Total Validation Pa Audio: 	1
Total Validation Si Audio: 	1
Total Validation Ta Audio: 	1
Total Validation Th Audio: 	1
Total Validation Ur Audio: 	1
Total Validation Zh Audio: 	1
Total Test Ar Audio: 	1
Total Test Fa Audio: 	1
Total Test Hi Audio: 	1
Total Test Id Audio: 	0
Total Test Ja Audio: 	1
Total Test Kn Audio: 	1
Total Test My Audio: 	1
Total Test Ne Audio: 	1
Total Test Pa Audio: 	1
Total Test Si 

## Preparing the Paths and Metadata

***
Instatiate the subset directory paths and write the annotations

In [33]:
# Initialize Directories
audio_path = Path('data/subset')
paths = {x: audio_path / x for x in ['train', 'validation', 'test']}

print(f'Training Dataset Directory: \t{paths["train"]}')
print(f'Validation Dataset Directory: \t{paths["validation"]}')
print(f'Test Dataset Directory: \t{paths["test"]}')

Training Dataset Directory: 	data/subset/train
Validation Dataset Directory: 	data/subset/validation
Test Dataset Directory: 	data/subset/test


In [34]:
def get_annotations(paths, classes=classes):
    """Parse audio files and get metadata"""
    for i, (stage, path) in enumerate(paths.items()):
        items = []
        for j, c in tqdm(enumerate(classes)):
            for audio in os.listdir(f'{path}/{c}'):
                audio_path = f'{path}/{c}/{audio}'
                
                items.append({
                    'path': audio_path,
                    'label': c,
                    'label_index': j,
                })
            
        df = pd.DataFrame(items)
        df.to_csv(f'./{stage}.csv', header=False)
        
get_annotations(paths, classes=classes)

14it [00:00, 2168.64it/s]
14it [00:00, 2345.34it/s]
14it [00:00, 2183.72it/s]
