In [None]:
from collections import Counter, defaultdict
from pathlib import Path

import torchaudio

from src.data.dataset import AudioClassificationDataset

In [None]:
def calculate_wav_duration_seconds(filepath: Path) -> float:
    waveform, sample_rate = torchaudio.load(filepath)
    duration_seconds = waveform.shape[1] / sample_rate
    return duration_seconds


def summarize(dataset: AudioClassificationDataset) -> None:
    num_samples = len(dataset.samples)
    class_counter = Counter([sample["label"] for sample in dataset.samples])
    speaker_counter = Counter([sample["speaker"] for sample in dataset.samples])

    class_durations = defaultdict(float)
    for sample in dataset.samples:
        class_durations[sample["label"]] += calculate_wav_duration_seconds(sample["filepath"])
    total_seconds = sum(class_durations.values())
    mean_duration = total_seconds / num_samples

    print(f"Number of samples: {num_samples}")
    print(f"Sample rate: {dataset.SAMPLE_RATE}")
    print(f"Total duration: {total_seconds:.2f}s")
    print(f"Mean duration: {mean_duration:.2f}s")
    print(f"Number of classes: {len(class_counter)}")
    print(f"Number of speakers: {len(speaker_counter)}")
    print("Number of samples per class:")
    print("\tLabel\tNum.\tNum. %\tSec.\tSec. %")
    for label in dataset.CLASS_LABELS:
        count = class_counter.get(label)
        class_info_string = f"\t{label} \t{count} \t"
        class_info_string += f"{count / num_samples * 100:.2f}%\t"
        class_info_string += f"{class_durations[label]:.0f}s\t"
        class_info_string += f"{class_durations[label] / total_seconds * 100:.2f}%"
        print(class_info_string)

In [None]:
summarize(AudioClassificationDataset(subset="training"))

In [None]:
summarize(AudioClassificationDataset(subset="validation"))

In [None]:
summarize(AudioClassificationDataset(subset="testing"))