# Audio Dataset Explorer
## Interactive notebook per esplorare ESC-50 e VEGAS datasets

Questo notebook permette di:
- üéµ Ascoltare campioni audio
- üñºÔ∏è Visualizzare immagini associate
- üìä Vedere statistiche dei dataset
- üîç Esplorare classi e samples in modo interattivo

## 1. Setup e Import

In [1]:
import sys
sys.path.insert(0, '/home/lpala/fedgfe/system')

import torch
import torchaudio
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio, display, HTML
from PIL import Image
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import warnings
warnings.filterwarnings('ignore')

# Import datasets
from datautils.dataset_esc50 import ESC50Dataset
from datautils.dataset_vegas import VEGASDataset

print("‚úÖ Imports successful!")

INFO:numexpr.utils:Note: detected 384 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
INFO:numexpr.utils:Note: NumExpr detected 384 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


‚úÖ Imports successful!


## 2. Helper Functions

In [2]:
def plot_waveform(waveform, sample_rate, title="Waveform"):
    """Plot audio waveform"""
    waveform = waveform.numpy()
    num_frames = waveform.shape[-1]
    time_axis = torch.arange(0, num_frames) / sample_rate
    
    plt.figure(figsize=(12, 3))
    plt.plot(time_axis, waveform)
    plt.grid(True)
    plt.title(title)
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.tight_layout()
    plt.show()

def plot_spectrogram(waveform, sample_rate, title="Spectrogram"):
    """Plot audio spectrogram"""
    waveform = waveform.numpy()
    
    plt.figure(figsize=(12, 4))
    plt.specgram(waveform, Fs=sample_rate, cmap='viridis')
    plt.title(title)
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.colorbar(label='Intensity (dB)')
    plt.tight_layout()
    plt.show()

def display_image(image_tensor, title="Image"):
    """Display image from tensor"""
    # Denormalize if normalized
    if image_tensor.min() < 0:
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
        image_tensor = image_tensor * std + mean
    
    image_tensor = torch.clamp(image_tensor, 0, 1)
    image = image_tensor.permute(1, 2, 0).numpy()
    
    plt.figure(figsize=(6, 6))
    plt.imshow(image)
    plt.title(title)
    plt.axis('off')
    plt.tight_layout()
    plt.show()

def display_sample_info(sample):
    """Display sample metadata"""
    print("\n" + "="*60)
    print("üìã SAMPLE INFORMATION")
    print("="*60)
    print(f"üè∑Ô∏è  Class: {sample['class_name']}")
    print(f"üî¢ Label: {sample['label'].item()}")
    print(f"üìù Audio file: {sample['audio_filename']}")
    print(f"üñºÔ∏è  Image file: {sample['image_filename']}")
    print(f"üìä Audio shape: {sample['audio'].shape}")
    if 'image' in sample:
        print(f"üé® Image shape: {sample['image'].shape}")
    if 'video' in sample:
        print(f"üé• Video shape: {sample['video'].shape}")
    
    if 'fold' in sample:
        print(f"üìÅ Fold: {sample['fold']}")
    if 'caption' in sample and sample['caption']:
        print(f"üí¨ Caption: {sample['caption']}")
    print("="*60)

def play_audio(waveform, sample_rate):
    """Play audio in notebook"""
    return Audio(waveform.numpy(), rate=sample_rate)

print("‚úÖ Helper functions loaded!")

‚úÖ Helper functions loaded!


## 3. Load Datasets

### 3.1 ESC-50 Dataset

In [3]:
# Load ESC-50 with specific classes (change as needed)
# esc50_classes = ['dog', 'cat', 'rooster', 'chainsaw', 'helicopter', 'airplane']
esc50_classes = None

print(f"Loading ESC-50 dataset with classes: {esc50_classes}")
esc50_dataset = ESC50Dataset(
    root_dir="/home/lpala/fedgfe/dataset/Audio/esc50-v2.0.0-full",
    selected_classes=esc50_classes,
    split='all',
    use_folds=False,
    enable_cache=False
)

print(f"\n‚úÖ ESC-50 Dataset loaded!")
print(f"   Total samples: {len(esc50_dataset)}")
print(f"   Classes: {esc50_dataset.get_class_names()}")
print(f"   Samples per class: {esc50_dataset.get_samples_per_class()}")

Loading ESC-50 dataset with classes: None


INFO:datautils.dataset_esc50:ESC-50 Dataset initialized: 2000 samples, classes: 50, split: all



‚úÖ ESC-50 Dataset loaded!
   Total samples: 2000
   Classes: ['airplane', 'breathing', 'brushing_teeth', 'can_opening', 'car_horn', 'cat', 'chainsaw', 'chirping_birds', 'church_bells', 'clapping', 'clock_alarm', 'clock_tick', 'coughing', 'cow', 'crackling_fire', 'crickets', 'crow', 'crying_baby', 'dog', 'door_wood_creaks', 'door_wood_knock', 'drinking_sipping', 'engine', 'fireworks', 'footsteps', 'frog', 'glass_breaking', 'hand_saw', 'helicopter', 'hen', 'insects', 'keyboard_typing', 'laughing', 'mouse_click', 'pig', 'pouring_water', 'rain', 'rooster', 'sea_waves', 'sheep', 'siren', 'sneezing', 'snoring', 'thunderstorm', 'toilet_flush', 'train', 'vacuum_cleaner', 'washing_machine', 'water_drops', 'wind']
   Samples per class: {'dog': 40, 'chirping_birds': 40, 'vacuum_cleaner': 40, 'thunderstorm': 40, 'door_wood_knock': 40, 'can_opening': 40, 'crow': 40, 'clapping': 40, 'fireworks': 40, 'chainsaw': 40, 'airplane': 40, 'mouse_click': 40, 'pouring_water': 40, 'train': 40, 'sheep': 40, '

### 3.2 VEGAS Dataset

In [4]:
# Load VEGAS with specific classes (change as needed)
vegas_classes = ['dog', 'baby_cry', 'chainsaw']

print(f"Loading VEGAS dataset with classes: {vegas_classes}")
vegas_dataset = VEGASDataset(
    root_dir="/home/lpala/fedgfe/dataset/Audio/VEGAS",
    selected_classes=vegas_classes,
    split='all',
    enable_cache=False,
    load_image=True
)

print(f"\n‚úÖ VEGAS Dataset loaded!")
print(f"   Total samples: {len(vegas_dataset)}")
print(f"   Classes: {vegas_dataset.get_class_names()}")
print(f"   Samples per class: {vegas_dataset.get_samples_per_class()}")

Loading VEGAS dataset with classes: ['dog', 'baby_cry', 'chainsaw']


INFO:datautils.dataset_vegas:VEGAS Dataset initialized: 6668 samples, classes: ['baby_cry', 'chainsaw', 'dog'], split: all



‚úÖ VEGAS Dataset loaded!
   Total samples: 6668
   Classes: ['baby_cry', 'chainsaw', 'dog']
   Samples per class: {'baby_cry': 2059, 'chainsaw': 1824, 'dog': 2785}


## 4. Interactive Dataset Explorer

### 4.1 ESC-50 Explorer

In [None]:
def explore_esc50_sample(sample_idx):
    """Interactive ESC-50 sample explorer"""
    sample = esc50_dataset[sample_idx]
    
    # Display info
    display_sample_info(sample)
    
    # Display image
    display_image(sample['image'], f"Image - {sample['class_name']}")
    
    # Display waveform
    plot_waveform(
        sample['audio'], 
        16000, 
        f"Waveform - {sample['class_name']}"
    )
    
    # Display spectrogram
    plot_spectrogram(
        sample['audio'], 
        16000, 
        f"Spectrogram - {sample['class_name']}"
    )
    
    # Play audio
    print("\nüéµ Audio Player:")
    display(play_audio(sample['audio'], 16000))
    
    return sample

# Create interactive widget
sample_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(esc50_dataset)-1,
    step=1,
    description='Sample:',
    continuous_update=False
)

print("\n" + "="*60)
print("üéõÔ∏è  ESC-50 INTERACTIVE EXPLORER")
print("="*60)
print("Use the slider below to explore different samples")
print("="*60 + "\n")

interactive_plot = interactive(explore_esc50_sample, sample_idx=sample_slider)
display(interactive_plot)


üéõÔ∏è  ESC-50 INTERACTIVE EXPLORER
Use the slider below to explore different samples



interactive(children=(IntSlider(value=0, continuous_update=False, description='Sample:', max=1999), Output()),‚Ä¶

### 4.2 VEGAS Explorer

In [5]:
def explore_vegas_sample(sample_idx):
    """Interactive VEGAS sample explorer"""
    sample = vegas_dataset[sample_idx]
    
    # Display info
    display_sample_info(sample)
    
    # Display image
    if 'image' in sample:
        display_image(sample['image'], f"Image - {sample['class_name']}")
    
    # Display waveform
    plot_waveform(
        sample['audio'], 
        16000, 
        f"Waveform - {sample['class_name']}"
    )
    
    # Display spectrogram
    plot_spectrogram(
        sample['audio'], 
        16000, 
        f"Spectrogram - {sample['class_name']}"
    )
    
    # Play audio
    print("\nüéµ Audio Player:")
    display(play_audio(sample['audio'], 16000))
    
    return sample

# Create interactive widget
vegas_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(vegas_dataset)-1,
    step=1,
    description='Sample:',
    continuous_update=False
)

print("\n" + "="*60)
print("üéõÔ∏è  VEGAS INTERACTIVE EXPLORER")
print("="*60)
print("Use the slider below to explore different samples")
print("="*60 + "\n")

interactive_vegas = interactive(explore_vegas_sample, sample_idx=vegas_slider)
display(interactive_vegas)


üéõÔ∏è  VEGAS INTERACTIVE EXPLORER
Use the slider below to explore different samples



interactive(children=(IntSlider(value=0, continuous_update=False, description='Sample:', max=6667), Output()),‚Ä¶

## 5. Class-based Explorer

### 5.1 ESC-50 by Class

In [6]:
def explore_esc50_by_class(class_name, sample_in_class):
    """Explore ESC-50 samples by class"""
    # Get all samples of this class
    class_samples = [i for i in range(len(esc50_dataset)) 
                     if esc50_dataset[i]['class_name'] == class_name]
    
    if not class_samples:
        print(f"No samples found for class: {class_name}")
        return
    
    print(f"Found {len(class_samples)} samples for class '{class_name}'")
    
    sample_idx = class_samples[sample_in_class % len(class_samples)]
    sample = esc50_dataset[sample_idx]
    
    # Display info
    print(f"\nShowing sample {sample_in_class + 1} of {len(class_samples)}")
    display_sample_info(sample)
    
    # Display image
    display_image(sample['image'], f"Image - {sample['class_name']}")
    
    # Display waveform
    plot_waveform(
        sample['audio'], 
        16000, 
        f"Waveform - {sample['class_name']}"
    )
    
    # Play audio
    print("\nüéµ Audio Player:")
    display(play_audio(sample['audio'], 16000))

# Create dropdown and slider
class_dropdown = widgets.Dropdown(
    options=esc50_dataset.get_class_names(),
    description='Class:',
)

class_sample_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=10,
    step=1,
    description='Sample #:',
    continuous_update=False
)

print("\n" + "="*60)
print("üéØ ESC-50 CLASS-BASED EXPLORER")
print("="*60)
print("Select a class and browse through its samples")
print("="*60 + "\n")

interactive_class = interactive(
    explore_esc50_by_class, 
    class_name=class_dropdown,
    sample_in_class=class_sample_slider
)
display(interactive_class)


üéØ ESC-50 CLASS-BASED EXPLORER
Select a class and browse through its samples



interactive(children=(Dropdown(description='Class:', options=('airplane', 'breathing', 'brushing_teeth', 'can_‚Ä¶

### 5.2 VEGAS by Class

In [None]:
def explore_vegas_by_class(class_name, sample_in_class):
    """Explore VEGAS samples by class"""
    # Get all samples of this class
    class_samples = [i for i in range(len(vegas_dataset)) 
                     if vegas_dataset[i]['class_name'] == class_name]
    
    if not class_samples:
        print(f"No samples found for class: {class_name}")
        return
    
    print(f"Found {len(class_samples)} samples for class '{class_name}'")
    
    sample_idx = class_samples[sample_in_class % len(class_samples)]
    sample = vegas_dataset[sample_idx]
    
    # Display info
    print(f"\nShowing sample {sample_in_class + 1} of {len(class_samples)}")
    display_sample_info(sample)
    
    # Display image
    display_image(sample['image'], f"Image - {sample['class_name']}")
    
    # Display waveform
    plot_waveform(
        sample['audio'], 
        16000, 
        f"Waveform - {sample['class_name']}"
    )
    
    # Play audio
    print("\nüéµ Audio Player:")
    display(play_audio(sample['audio'], 16000))

# Create dropdown and slider
vegas_class_dropdown = widgets.Dropdown(
    options=vegas_dataset.get_class_names(),
    description='Class:',
)

vegas_class_sample_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=10,
    step=1,
    description='Sample #:',
    continuous_update=False
)

print("\n" + "="*60)
print("üéØ VEGAS CLASS-BASED EXPLORER")
print("="*60)
print("Select a class and browse through its samples")
print("="*60 + "\n")

interactive_vegas_class = interactive(
    explore_vegas_by_class, 
    class_name=vegas_class_dropdown,
    sample_in_class=vegas_class_sample_slider
)
display(interactive_vegas_class)

## 6. Dataset Statistics

In [None]:
def plot_class_distribution(dataset, title):
    """Plot class distribution"""
    samples_per_class = dataset.get_samples_per_class()
    
    plt.figure(figsize=(12, 6))
    plt.bar(samples_per_class.keys(), samples_per_class.values())
    plt.title(f"{title} - Class Distribution")
    plt.xlabel("Class")
    plt.ylabel("Number of Samples")
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Print statistics
    print(f"\nüìä {title} Statistics:")
    print(f"   Total samples: {len(dataset)}")
    print(f"   Number of classes: {len(samples_per_class)}")
    print(f"   Min samples per class: {min(samples_per_class.values())}")
    print(f"   Max samples per class: {max(samples_per_class.values())}")
    print(f"   Avg samples per class: {sum(samples_per_class.values()) / len(samples_per_class):.1f}")

# Plot ESC-50 distribution
plot_class_distribution(esc50_dataset, "ESC-50")

# Plot VEGAS distribution
plot_class_distribution(vegas_dataset, "VEGAS")

## 7. Audio Analysis

In [None]:
def analyze_audio_statistics(dataset, dataset_name, num_samples=20):
    """Analyze audio statistics across dataset"""
    print(f"\nüî¨ Analyzing {dataset_name} audio statistics (first {num_samples} samples)...\n")
    
    amplitudes = []
    durations = []
    
    for i in range(min(num_samples, len(dataset))):
        sample = dataset[i]
        audio = sample['audio']
        
        amplitudes.append(audio.abs().mean().item())
        durations.append(len(audio) / 16000)
    
    print(f"üìà Audio Statistics:")
    print(f"   Mean amplitude: {np.mean(amplitudes):.4f}")
    print(f"   Std amplitude: {np.std(amplitudes):.4f}")
    print(f"   Mean duration: {np.mean(durations):.2f}s")
    print(f"   Std duration: {np.std(durations):.2f}s")
    
    # Plot amplitude distribution
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.hist(amplitudes, bins=20, edgecolor='black')
    plt.title(f"{dataset_name} - Amplitude Distribution")
    plt.xlabel("Mean Amplitude")
    plt.ylabel("Count")
    plt.grid(axis='y', alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.hist(durations, bins=20, edgecolor='black')
    plt.title(f"{dataset_name} - Duration Distribution")
    plt.xlabel("Duration (s)")
    plt.ylabel("Count")
    plt.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Analyze both datasets
analyze_audio_statistics(esc50_dataset, "ESC-50", num_samples=20)
analyze_audio_statistics(vegas_dataset, "VEGAS", num_samples=20)

## 8. Compare Samples from Different Classes

In [None]:
def compare_classes(dataset, class1, class2):
    """Compare audio samples from two different classes"""
    # Get first sample of each class
    sample1 = None
    sample2 = None
    
    for i in range(len(dataset)):
        sample = dataset[i]
        if sample['class_name'] == class1 and sample1 is None:
            sample1 = sample
        if sample['class_name'] == class2 and sample2 is None:
            sample2 = sample
        if sample1 and sample2:
            break
    
    if not sample1 or not sample2:
        print("Could not find samples for comparison")
        return
    
    # Plot comparison
    fig, axes = plt.subplots(2, 2, figsize=(14, 8))
    
    # Waveforms
    time1 = torch.arange(0, len(sample1['audio'])) / 16000
    axes[0, 0].plot(time1, sample1['audio'].numpy())
    axes[0, 0].set_title(f"Waveform - {class1}")
    axes[0, 0].set_xlabel("Time (s)")
    axes[0, 0].set_ylabel("Amplitude")
    axes[0, 0].grid(True)
    
    time2 = torch.arange(0, len(sample2['audio'])) / 16000
    axes[0, 1].plot(time2, sample2['audio'].numpy())
    axes[0, 1].set_title(f"Waveform - {class2}")
    axes[0, 1].set_xlabel("Time (s)")
    axes[0, 1].set_ylabel("Amplitude")
    axes[0, 1].grid(True)
    
    # Spectrograms
    axes[1, 0].specgram(sample1['audio'].numpy(), Fs=16000, cmap='viridis')
    axes[1, 0].set_title(f"Spectrogram - {class1}")
    axes[1, 0].set_xlabel("Time (s)")
    axes[1, 0].set_ylabel("Frequency (Hz)")
    
    axes[1, 1].specgram(sample2['audio'].numpy(), Fs=16000, cmap='viridis')
    axes[1, 1].set_title(f"Spectrogram - {class2}")
    axes[1, 1].set_xlabel("Time (s)")
    axes[1, 1].set_ylabel("Frequency (Hz)")
    
    plt.tight_layout()
    plt.show()
    
    # Audio players
    print(f"\nüéµ Audio Player - {class1}:")
    display(play_audio(sample1['audio'], 16000))
    
    print(f"\nüéµ Audio Player - {class2}:")
    display(play_audio(sample2['audio'], 16000))

# Create interactive comparison
class1_dropdown = widgets.Dropdown(
    options=esc50_dataset.get_class_names(),
    description='Class 1:',
    value=esc50_dataset.get_class_names()[0]
)

class2_dropdown = widgets.Dropdown(
    options=esc50_dataset.get_class_names(),
    description='Class 2:',
    value=esc50_dataset.get_class_names()[1] if len(esc50_dataset.get_class_names()) > 1 else esc50_dataset.get_class_names()[0]
)

print("\n" + "="*60)
print("üîÄ CLASS COMPARISON TOOL")
print("="*60)
print("Compare audio samples from two different classes")
print("="*60 + "\n")

interactive_compare = interactive(
    lambda c1, c2: compare_classes(esc50_dataset, c1, c2),
    c1=class1_dropdown,
    c2=class2_dropdown
)
display(interactive_compare)

## 9. Export Sample

Save a specific sample to disk for further analysis

In [None]:
def export_sample(dataset, sample_idx, output_dir="/tmp/audio_samples"):
    """Export a sample to disk"""
    import os
    os.makedirs(output_dir, exist_ok=True)
    
    sample = dataset[sample_idx]
    class_name = sample['class_name']
    
    # Save audio
    audio_path = os.path.join(output_dir, f"{class_name}_{sample_idx}.wav")
    torchaudio.save(audio_path, sample['audio'].unsqueeze(0), 16000)
    
    # Save image
    image_path = os.path.join(output_dir, f"{class_name}_{sample_idx}.png")
    # Denormalize and save
    image_tensor = sample['image']
    if image_tensor.min() < 0:
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
        image_tensor = image_tensor * std + mean
    image_tensor = torch.clamp(image_tensor, 0, 1)
    image = image_tensor.permute(1, 2, 0).numpy()
    Image.fromarray((image * 255).astype(np.uint8)).save(image_path)
    
    print(f"‚úÖ Sample exported to:")
    print(f"   Audio: {audio_path}")
    print(f"   Image: {image_path}")

# Example: export first ESC-50 sample
# export_sample(esc50_dataset, 0)

## 10. Summary

This notebook provides:
- ‚úÖ Interactive exploration of ESC-50 and VEGAS datasets
- ‚úÖ Audio playback in notebook
- ‚úÖ Waveform and spectrogram visualization
- ‚úÖ Class-based browsing
- ‚úÖ Dataset statistics and analysis
- ‚úÖ Sample comparison tools
- ‚úÖ Export functionality

### Quick Access:
- **Section 4**: Interactive explorers for both datasets
- **Section 5**: Class-based exploration
- **Section 8**: Compare samples from different classes

Enjoy exploring your audio datasets! üéµ