# Explore Dataset

This notebook is used to extract and save metadata from soundly-80k.

In [None]:
from collections import Counter
import pandas as pd
import torch
import torchaudio
import os
import matplotlib.pyplot as plt
import matplotlib
import IPython.display as ipd
import json
import numpy as np

In [None]:
# Save dataset as .csv
audio_path ='E:/Soundly Library juni 2023/Soundly Pro' 

audio_files = []
metadata = {
    'path' : [],
    'channels': [],
    'samples' : [],
    'sample_rate' : [],
    'bit_depth' : [],
    'category' : [],
    'sub_category' : []
}
for path, dirs, files in os.walk(audio_path):
    if ( False ):
        if(len(dirs) != 0):
            print(dirs)
        if(len(files) != 0):
            print(files)

    for file in files:
        try:
            file_path = path + "/" + file
            file_path = file_path.replace("\\", "/")
            path_split = file_path.split("/")[3:] # Anything before element 4 is static, the base of the file name.

            # Default categories, given if no other category info is found.
            category = 'other'
            sub_category = 'none'

            if (len(path_split) == 3):
                category = path_split[0]
                sub_category = path_split[1]

            data = torchaudio.info(file_path)
            
            metadata['path'].append(file_path)
            metadata['channels'].append(data.num_channels)
            metadata['samples'].append(data.num_frames)
            metadata['sample_rate'].append(data.sample_rate)
            metadata['bit_depth'].append(data.bits_per_sample)
            metadata['category'].append(category.lower())
            metadata['sub_category'].append(sub_category.lower())
        except:
            print(f"File '{file}' could not be opened.")
        

df = pd.DataFrame.from_dict(metadata)
df.to_csv("E:/datasets/soundly_ds.csv", index=False)

In [None]:
def plot_dict(data:dict, title:str="Title", ylabel:str="Label Y", xlabel:str="Label X", sorting_method=1):
    sorted_data = sorted(data.items(), key=lambda item: item[sorting_method], reverse=True)
    sorted_data_list = list(zip(*sorted_data))

    plt.figure(figsize=(10,15))
    plt.xticks(rotation = 90, fontsize = 16)
    plt.title(title, fontsize = 40)
    plt.ylabel(ylabel, fontsize = 40)
    plt.xlabel(xlabel, fontsize = 40)
    sorted_data_list[0] = [str(item) for item in sorted_data_list[0]]
    bars = plt.bar(sorted_data_list[0], sorted_data_list[1])
    plt.bar_label(bars)
    plt.show()

plot_dict(metadata['sample_rate'], title="Sample Rates (Soundly)", ylabel="Samples", xlabel="Rates", sorting_method=0)

In [None]:
# Get info about the Soundly dataset and save it as a separate csv file.
df = pd.read_csv("E:/datasets/soundly.csv")

rates = Counter(df['sample_rate'])
bit_depths = Counter(df['bit_depth'])
n_channels = Counter(df['channels'])

dataset_soundly_meta = {
    'audio files' : [len(df.index)],
    'shortest sound (samples)' : [df['samples'].min()],
    'shortest sound (seconds)' : [df['samples'].min() / 96000],
    'longest sound (samples)' : [df['samples'].max()],
    'longest sound (seconds)' : [df['samples'].max() / 96000],
    '1 channel': [n_channels[1]],
    '2 channels': [n_channels[2]],
    '3 channels': [n_channels[4]],
    '4 channels': [n_channels[4]],
    '5 channels': [n_channels[5]],
    '6 channels': [n_channels[6]],
    'sample rate 192000' : [rates[192000]],
    'sample rate 96000' : [rates[96000]],
    'sample rate 48000' : [rates[48000]],
    'sample rate 44100' : [rates[44100]],
    'bit depth 32' : [bit_depths[32]],
    'bit depth 24' : [bit_depths[24]],
    'bit depth 16' : [bit_depths[16]],
}

df = pd.DataFrame.from_dict(dataset_soundly_meta).T
df.reset_index().to_csv("E:/metadata_soundly.csv", index=dataset_soundly_meta.keys(), header=["feature", "value"])

## Plot dataset classes

In [None]:
df = pd.read_csv("D:/datasets/soundly.csv")
classes = df['category'].value_counts()

classes.keys()

matplotlib.ticker.MaxNLocator
params = {'grid_alpha': 1}

fig = plt.figure(2, figsize=(16, 28))
ax = fig.subplots(1)
ax.set_ylabel("UCS Categories", fontsize=32)
ax.set_xlim(0, 11000)
bar = ax.barh(classes.keys(), classes.values, color="#004DC6", height=.8)
ax.bar_label(bar, padding=1, fontsize=12)
ax.set_yticks(np.arange(len(classes.keys())), labels=list(classes.keys()))
ax.invert_yaxis()
ax.set_xlabel('Samples', fontsize=32)
ax.tick_params(axis='x', which='major', labelsize=16)
ax.tick_params(axis='y', which='major', labelsize=16)
ax.set_title("Soundly-80K Class Distribution", fontsize=24, pad=20)
ax.grid(color='black', linestyle='-', linewidth=1, axis='x', alpha=0.2)
ax.margins(0.01)
plt.savefig("soundly_class_distribution.png", dpi=300, bbox_inches='tight')