In [12]:
import os

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

print(f'Torch cuda enabled: {torch.cuda.is_available()}')

urban_path = '~/sound_datasets/urbansound8k/metadata/UrbanSound8K.csv'

Torch cuda enabled: True


In [18]:
# Return a list of maps like:
# [{train: [fold_1_training_data_paths], test: [fold_1_test_data_paths]}),
#  {train: [fold_2_training_data_paths], test: [fold_2_test_data_paths]}),
#  ...
#  {train: [fold_10_training_data_paths], test: [fold_10_test_data_paths]}),
# ]
def k_fold_urban_sound(metadata_path):
    """
    Extract the 10 recommended folds of UrbanSound8K
    
    Returns:
        a list of map folds in the form:
    [{train: [fold_1_training_data_paths], test: [fold_1_test_data_paths]}),
     {train: [fold_2_training_data_paths], test: [fold_2_test_data_paths]}),
     ...
     {train: [fold_10_training_data_paths], test: [fold_10_test_data_paths]}),
     ]
        
    """
    folds = []
    frame = pd.read_csv(metadata_path)
    frame.info()
    
    print("\nSummarizing folds:")
    print('-----------------------------------------------------------')
    for i in range(1,11):
        train_mask = frame['fold'] != i
        test_mask = frame['fold'] == i
        
        print(f'Training set size for fold {i} : {len(frame[train_mask])}')
        print(f'Test set size for fold {i} : {len(frame[test_mask])}')
        
        train = frame[train_mask]
        test = frame[test_mask]
        
        train_paths = train.apply(lambda r: f"{r['fold']}/{r['slice_file_name']}", axis=1)
        test_paths = train.apply(lambda r: f"{r['fold']}/{r['slice_file_name']}", axis=1)
        
        folds.append({"train": train_paths.tolist(), "test": test_paths.tolist()})
        print('-----------------------------------------------------------')
    print("\n\n")
    
    return folds

In [19]:
def examine_urban_sound(urban_path):
    df = pd.read_csv(urban_path)
    total = len(df)
    
    print(f"Total samples: {total}")
    print(f"{'Class':<16} | {'Frequency':<10} | {'Percentage':<10}")
    print("-" * 40)
    
    distribution = df['class'].value_counts()
    for cls, freq in distribution.items():
        percentage = (freq / total) * 100
        print(f"{cls:<16} | {freq:<10} | {percentage:.2f}%")
    print("-" * 40 + "\n")
    
    df["duration"] = df["end"] - df["start"]
    print(f"Duration statistics: \n{df['duration'].describe()}")
    

In [20]:
class UrbanSoundDataSet(Dataset):
    def __init__(self, path, fold):
        raise NotImplementedError
    def __getitem__(self, x):
        raise NotImplementedError
    def __len__(self):
        raise NotImplementedError

In [21]:
folds = k_fold_urban_sound(urban_path)
examine_urban_sound(urban_path)

#TODO: Examine each fold

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8732 entries, 0 to 8731
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   slice_file_name  8732 non-null   object 
 1   fsID             8732 non-null   int64  
 2   start            8732 non-null   float64
 3   end              8732 non-null   float64
 4   salience         8732 non-null   int64  
 5   fold             8732 non-null   int64  
 6   classID          8732 non-null   int64  
 7   class            8732 non-null   object 
dtypes: float64(2), int64(4), object(2)
memory usage: 545.9+ KB

Summarizing folds:
-----------------------------------------------------------
Training set size for fold 1 : 7859
Test set size for fold 1 : 873
-----------------------------------------------------------
Training set size for fold 2 : 7844
Test set size for fold 2 : 888
-----------------------------------------------------------
Training set size for fold 3 : 7807


In [22]:
print(folds[0])

{'train': ['5/100032-3-0-0.wav', '5/100263-2-0-117.wav', '5/100263-2-0-121.wav', '5/100263-2-0-126.wav', '5/100263-2-0-137.wav', '5/100263-2-0-143.wav', '5/100263-2-0-161.wav', '5/100263-2-0-3.wav', '5/100263-2-0-36.wav', '10/100648-1-0-0.wav', '10/100648-1-1-0.wav', '10/100648-1-2-0.wav', '10/100648-1-3-0.wav', '10/100648-1-4-0.wav', '2/100652-3-0-0.wav', '2/100652-3-0-1.wav', '2/100652-3-0-2.wav', '2/100652-3-0-3.wav', '10/100795-3-0-0.wav', '10/100795-3-1-0.wav', '10/100795-3-1-1.wav', '10/100795-3-1-2.wav', '5/100852-0-0-0.wav', '5/100852-0-0-1.wav', '5/100852-0-0-10.wav', '5/100852-0-0-11.wav', '5/100852-0-0-12.wav', '5/100852-0-0-13.wav', '5/100852-0-0-14.wav', '5/100852-0-0-15.wav', '5/100852-0-0-16.wav', '5/100852-0-0-17.wav', '5/100852-0-0-18.wav', '5/100852-0-0-19.wav', '5/100852-0-0-2.wav', '5/100852-0-0-20.wav', '5/100852-0-0-21.wav', '5/100852-0-0-22.wav', '5/100852-0-0-23.wav', '5/100852-0-0-24.wav', '5/100852-0-0-25.wav', '5/100852-0-0-26.wav', '5/100852-0-0-27.wav', '5/