In [None]:
import os, sys
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import torch.nn.functional as F

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from utils import util, models, split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == 'cuda':
    print(torch.cuda.get_device_name(device))
else:
    print("CUDA not available")

In [None]:
import importlib
importlib.reload(util)
importlib.reload(models)
importlib.reload(split)

In [None]:
def get_spect_matrix_list(spects_source_dir, spects_meta_df):
    """
    Load spectrograms directly as matrices without flattening to CSV.
    
    Args:
        spects_source_dir (str): Directory where the spectrogram images are stored in .png format
        spects_meta_df (pd.DataFrame): DataFrame with columns 'filename', 'class_id', and 'author'
    
    Returns:
        tuple: (matrices_list, labels_list, authors_list)
    """
    from PIL import Image
    import numpy as np
    import os

    matrices_list = []
    labels_list = []
    authors_list = []
    
    spects_meta_df = spects_meta_df.dropna(subset=['filename', 'class_id', 'author'])

    print(f"Processing {len(spects_meta_df)} spectrograms...")
    processed_count = 0
    skipped_count = 0

    for _, row in spects_meta_df.iterrows():
        filename = row['filename']
        class_id = row['class_id']
        author = row['author']

        image_path = os.path.join(spects_source_dir, filename)
        if not os.path.exists(image_path):
            print(f"File not found: {image_path}")
            skipped_count += 1
            continue

        img = Image.open(image_path).convert('L')  # Ensure grayscale
        
        expected_shape = (313, 224)  # PIL uses (width, height) format
        if img.size != expected_shape:
            print(f"Warning: Unexpected image size: {img.size} in file {image_path}. Expected {expected_shape}.")
            # Resize if needed
            img = img.resize(expected_shape)

        # Convert to numpy array (this gives us height x width, i.e., 313 x 224)
        pixels = np.array(img)
        
        matrices_list.append(pixels)
        labels_list.append(class_id)
        authors_list.append(author)
        processed_count += 1

    print(f"Successfully processed: {processed_count}")
    print(f"Skipped: {skipped_count}")

    if not matrices_list:
        raise ValueError("No spectrograms were loaded. Check paths and metadata consistency.")

    return matrices_list, labels_list, authors_list


In [None]:
# Load spectrogram data and metadata
spect_dir = os.path.join('..', 'database', 'spect')  # Spectrogram PNG directory
spects_df = pd.read_csv(os.path.join('..', 'database', 'meta', 'final_spects.csv'))  # Metadata

print("Loading spectrograms directly into matrices...")
matrices_list, labels_list, authors_list = get_spect_matrix_list(spect_dir, spects_df)

print(f"Loaded {len(matrices_list)} spectrograms")
print(f"Matrix shape: {matrices_list[0].shape}")
print(f"Unique labels: {len(set(labels_list))}")
print(f"Unique authors: {len(set(authors_list))}")

In [None]:
# Process data for training
labels = np.array(labels_list, dtype=np.int64)
authors = np.array(authors_list)

# Convert matrices to numpy array and normalize
features = np.array(matrices_list, dtype=np.float32)
# Convert to 0-1 range first, then standardization will be applied per fold
features /= 255.0
# Reshape to add channel dimension for CNN: (samples, channels, height, width)
features = features.reshape(-1, 1, 224, 313)

print("features shape:", features.shape)
print("labels shape:", labels.shape)
print("authors shape:", authors.shape)

# Create metadata DataFrame for splitting (with sample indices)
metadata_df = pd.DataFrame({
    'sample_idx': range(len(labels)),
    'class_id': labels,
    'author': authors,
    'usable_segments': 1  # Each sample represents 1 segment
})

print("metadata_df shape:", metadata_df.shape)
print("Unique authors:", len(metadata_df['author'].unique()))
print("Unique classes:", len(metadata_df['class_id'].unique()))

In [None]:
# Reload modules to pick up any changes
import importlib
importlib.reload(models)
importlib.reload(util)
importlib.reload(split)

In [None]:
# Prepare tensors for PyTorch
X_tensor = torch.tensor(features, dtype=torch.float32)
y_tensor = torch.tensor(labels, dtype=torch.long)
dataset = TensorDataset(X_tensor, y_tensor)

print(f"Dataset created with {len(dataset)} samples")
print(f"Tensor shapes: X={X_tensor.shape}, y={y_tensor.shape}")

In [None]:
# Find the best 80-20 split using author grouping
print("Finding best 80-20 split with author grouping...")
dev_df, test_df, best_split_score = split.search_best_group_seed(
    df=metadata_df,
    test_size=0.2,
    max_attempts=5_000,
    min_test_segments=3
)

# Extract indices for single fold training
train_indices_single = dev_df['sample_idx'].values
val_indices_single = test_df['sample_idx'].values

print(f"Best 80-20 split found with score: {best_split_score:.3f}")
print(f"Train samples: {len(train_indices_single)}, Validation samples: {len(val_indices_single)}")

In [None]:
# Test the optimized training function performance
import time

print("Testing optimized training function startup time...")
start_time = time.time()

# Create a small test to measure overhead
test_indices = train_indices_single[:100] if len(train_indices_single) > 100 else train_indices_single[:50]
test_val_indices = val_indices_single[:20] if len(val_indices_single) > 20 else val_indices_single[:10]

print(f"Test dataset size: Train={len(test_indices)}, Val={len(test_val_indices)}")

# Measure just the data loading and setup overhead
print("\nTiming data loader creation...")
loader_start = time.time()

# Create standardized subset directly to test
if True:  # Test standardization path
    sample_size = min(50, len(test_indices))
    sample_indices = np.random.choice(test_indices, sample_size, replace=False)
    sample_data = torch.stack([dataset[i][0] for i in sample_indices])
    train_mean = sample_data.mean()
    train_std = sample_data.std()
    
    class StandardizedSubset(torch.utils.data.Dataset):
        def __init__(self, original_dataset, indices, mean, std):
            self.dataset = original_dataset
            self.indices = list(indices)  # Convert to list for compatibility
            self.mean = mean
            self.std = std + 1e-8
        
        def __len__(self):
            return len(self.indices)
        
        def __getitem__(self, idx):
            real_idx = self.indices[idx]
            x, y = self.dataset[real_idx]
            x_standardized = (x - self.mean) / self.std
            return x_standardized, y
    
    test_train_subset = StandardizedSubset(dataset, test_indices, train_mean, train_std)
    test_val_subset = StandardizedSubset(dataset, test_val_indices, train_mean, train_std)
else:
    from torch.utils.data import Subset
    test_train_subset = Subset(dataset, test_indices)
    test_val_subset = Subset(dataset, test_val_indices)

# Test DataLoader creation with single thread to avoid worker crashes
test_train_loader = torch.utils.data.DataLoader(
    test_train_subset,
    batch_size=32,
    shuffle=True,
    num_workers=0,  # Use single thread to avoid worker crashes
    pin_memory=False,
    persistent_workers=False
)

test_val_loader = torch.utils.data.DataLoader(
    test_val_subset,
    batch_size=32,
    shuffle=False,
    num_workers=0,  # Use single thread to avoid worker crashes
    pin_memory=False,
    persistent_workers=False
)

loader_time = time.time() - loader_start
print(f"DataLoader creation time: {loader_time:.2f} seconds")

# Test first batch loading
print("Testing first batch loading...")
batch_start = time.time()
try:
    test_batch = next(iter(test_train_loader))
    batch_time = time.time() - batch_start
    print(f"First batch loading time: {batch_time:.2f} seconds")
    print(f"Batch shape: {test_batch[0].shape}")
    print("✓ DataLoader working correctly!")
except Exception as e:
    print(f"Error loading batch: {e}")
    print("Falling back to direct dataset access...")
    batch_start = time.time()
    test_sample = test_train_subset[0]
    batch_time = time.time() - batch_start
    print(f"Direct dataset access time: {batch_time:.4f} seconds")
    print(f"Sample shape: {test_sample[0].shape}")

total_time = time.time() - start_time
print(f"\nTotal test time: {total_time:.2f} seconds")
print("Optimization complete - training should start much faster now!")

## Single Fold Training with Predefined Splits

In [None]:
# Verify that author grouping is preserved in our splits
print("Verifying author grouping in predefined splits...")
print("="*50)

# Get authors for training and validation sets
train_authors = set(metadata_df.loc[metadata_df['sample_idx'].isin(train_indices_single), 'author'])
val_authors = set(metadata_df.loc[metadata_df['sample_idx'].isin(val_indices_single), 'author'])

# Check for overlap
author_overlap = train_authors & val_authors
print(f"Training set authors: {len(train_authors)} unique authors")
print(f"Validation set authors: {len(val_authors)} unique authors")
print(f"Author overlap between train/val: {len(author_overlap)} authors")

if len(author_overlap) == 0:
    print("✓ PERFECT: No author overlap - authors are properly grouped!")
else:
    print(f"⚠️ WARNING: {len(author_overlap)} authors appear in both sets")
    print(f"Overlapping authors: {author_overlap}")

# Check class distribution
train_classes = set(metadata_df.loc[metadata_df['sample_idx'].isin(train_indices_single), 'class_id'])
val_classes = set(metadata_df.loc[metadata_df['sample_idx'].isin(val_indices_single), 'class_id'])

print(f"\nClass distribution:")
print(f"Training set classes: {len(train_classes)} classes")
print(f"Validation set classes: {len(val_classes)} classes")
print(f"All classes present in both sets: {train_classes == val_classes}")

print("\n" + "="*50)
print("Both optimized functions use these SAME predefined author-grouped splits!")

Run models

In [None]:
# Run original (now optimized) single fold training with best 80-20 split found above
# This uses the optimal train/validation split with author grouping
train_indices, val_indices = train_indices_single, val_indices_single

print("Using OPTIMIZED original function with predefined author-grouped splits...")
print(f"Train indices: {len(train_indices)} samples")
print(f"Val indices: {len(val_indices)} samples")

# Use the fast training function to avoid multiprocessing issues
single_results_original = util.fast_single_fold_training_with_predefined_split(
    dataset=dataset,
    train_indices=train_indices,
    val_indices=val_indices,
    model_class=models.BirdCNN,
    num_classes=len(set(labels_list)),
    num_epochs=250,
    batch_size=48,
    lr=0.001,
    use_class_weights=True,
    estop=35,
    standardize=True
)

In [None]:
# Plot individual training curves for original optimized function
util.plot_single_fold_curve(single_results_original, metric_key='accuracies', title="Original Optimized - Accuracy Curves", ylabel="Accuracy")
util.plot_single_fold_curve(single_results_original, metric_key='losses', title="Original Optimized - Loss Curves", ylabel="Cross Entropy Loss")
util.plot_single_fold_curve(single_results_original, metric_key='f1s', title="Original Optimized - F1 Score Curves", ylabel="Macro F1 Score")

# Print results summary
util.print_single_fold_results(single_results_original)

# Display confusion matrix
util.plot_confusion_matrix(single_results_original['confusion_matrix'], title="BirdCNN Original Optimized - Validation Confusion Matrix")
util.print_confusion_matrix_stats(single_results_original)

In [None]:
# Run FAST single fold training with best 80-20 split found above
# This uses the optimal train/validation split with author grouping
train_indices, val_indices = train_indices_single, val_indices_single

# Use the optimized fast training function
single_results = util.fast_single_fold_training_with_predefined_split(
    dataset=dataset,
    train_indices=train_indices,
    val_indices=val_indices,
    model_class=models.BirdCNN,
    num_classes=len(set(labels_list)),
    num_epochs=250,
    batch_size=48,
    lr=0.001,
    use_class_weights=True,
    estop=35,
    standardize=True
)

In [None]:
# Plot individual training curves for single fold
util.plot_single_fold_curve(single_results, metric_key='accuracies', title="Single Fold - Accuracy Curves", ylabel="Accuracy")
util.plot_single_fold_curve(single_results, metric_key='losses', title="Single Fold - Loss Curves", ylabel="Cross Entropy Loss")
util.plot_single_fold_curve(single_results, metric_key='f1s', title="Single Fold - F1 Score Curves", ylabel="Macro F1 Score")

# Print results summary
util.print_single_fold_results(single_results)

# Display confusion matrix
util.plot_confusion_matrix(single_results['confusion_matrix'], title="BirdCNN - Validation Confusion Matrix")
util.print_confusion_matrix_stats(single_results)

In [None]:
# Run single fold training with best 80-20 split found above
# This uses the optimal train/validation split with author grouping
train_indices, val_indices = train_indices_single, val_indices_single

single_results = util.fast_single_fold_training_with_predefined_split(
    dataset=dataset,
    train_indices=train_indices,
    val_indices=val_indices,
    model_class=models.BirdResNet,
    num_classes=len(set(labels_list)),
    num_epochs=250,
    batch_size=48,
    lr=0.001,
    use_class_weights=False,
    estop=35,
    standardize=True
)

In [None]:
# Plot individual training curves for single fold
util.plot_single_fold_curve(single_results, metric_key='accuracies', title="BirdRes - Accuracy Curves", ylabel="Accuracy")
util.plot_single_fold_curve(single_results, metric_key='losses', title="BirdRes - Loss Curves", ylabel="Cross Entropy Loss")
util.plot_single_fold_curve(single_results, metric_key='f1s', title="BirdRes - F1 Score Curves", ylabel="Macro F1 Score")

# Print results summary
util.print_single_fold_results(single_results)

# Display confusion matrix
util.plot_confusion_matrix(single_results['confusion_matrix'], title="BirdResNet - Validation Confusion Matrix")
util.print_confusion_matrix_stats(single_results)

## SpecAugment Training

In [None]:
# Import SpecAugment
import importlib
from utils.specaugment import get_recommended_params, visualize_specaugment
from utils.util import fast_single_fold_training_with_augmentation

# Reload modules
importlib.reload(util)

In [None]:
# Get recommended SpecAugment parameters for your dataset
total_samples = len(metadata_df)
num_classes_actual = len(set(labels_list))

recommended_params = get_recommended_params(
    num_samples=total_samples,
    num_classes=num_classes_actual,
    input_size=(224, 313)
)

print(f"Dataset statistics:")
print(f"  Total samples: {total_samples}")
print(f"  Number of classes: {num_classes_actual}")
print(f"  Samples per class (avg): {total_samples / num_classes_actual:.1f}")

print(f"\nRecommended SpecAugment parameters:")
for key, value in recommended_params.items():
    print(f"  {key}: {value}")

In [None]:
# Test SpecAugment visualization on actual data
print("Testing SpecAugment on actual training data...")

# Get a sample from the dataset
sample_idx = 0
sample_spec, sample_label = dataset[sample_idx]

print(f"Sample shape: {sample_spec.shape}")
print(f"Sample label: {sample_label}")

# Apply SpecAugment
from utils.specaugment import SpecAugment
augmenter = SpecAugment(**recommended_params)
augmented_spec = augmenter(sample_spec)

# Visualize
visualize_specaugment(
    sample_spec.squeeze(0),  # Remove channel dimension for visualization
    augmented_spec.squeeze(0),
    title=f"SpecAugment on Training Data - Class {sample_label}"
)

In [None]:
# Train BirdCNN with SpecAugment using predefined author-grouped splits
print("Training BirdCNN with SpecAugment and author-grouped splits...")

# Use the same train/val indices from earlier
train_indices, val_indices = train_indices_single, val_indices_single

# Train with SpecAugment
results_with_augment = fast_single_fold_training_with_augmentation(
    dataset=dataset,
    train_indices=train_indices,
    val_indices=val_indices,
    model_class=models.BirdCNN,
    num_classes=len(set(labels_list)),
    num_epochs=250,
    batch_size=48,
    lr=0.001,
    use_class_weights=True,
    estop=35,
    standardize=True,
    augment_params=recommended_params
)

In [None]:
# Plot training curves with SpecAugment
util.plot_single_fold_curve(results_with_augment, metric_key='accuracies', title="SpecAugment - Accuracy Curves", ylabel="Accuracy")
util.plot_single_fold_curve(results_with_augment, metric_key='losses', title="SpecAugment - Loss Curves", ylabel="Cross Entropy Loss")
util.plot_single_fold_curve(results_with_augment, metric_key='f1s', title="SpecAugment - F1 Score Curves", ylabel="Macro F1 Score")

# Print results
util.print_single_fold_results(results_with_augment)

# Display confusion matrix
util.plot_confusion_matrix(results_with_augment['confusion_matrix'], title="BirdCNN with SpecAugment - Validation Confusion Matrix")
util.print_confusion_matrix_stats(results_with_augment)

In [None]:
# Compare results: Original vs SpecAugment
print("COMPARISON: Original vs SpecAugment Training")
print("=" * 60)

if 'single_results' in locals():
    print("Original Training (without SpecAugment):")
    print(f"  Final Val Accuracy: {single_results['final_val_acc']:.4f}")
    print(f"  Final Val F1 Score: {single_results['final_val_f1']:.4f}")
    print(f"  Best Val Accuracy: {single_results['best_val_acc']:.4f}")
    print(f"  Best Val F1 Score: {single_results['best_val_f1']:.4f}")
    
    print(f"\nWith SpecAugment:")
    print(f"  Final Val Accuracy: {results_with_augment['final_val_acc']:.4f}")
    print(f"  Final Val F1 Score: {results_with_augment['final_val_f1']:.4f}")
    print(f"  Best Val Accuracy: {results_with_augment['best_val_acc']:.4f}")
    print(f"  Best Val F1 Score: {results_with_augment['best_val_f1']:.4f}")
    
    # Calculate improvements
    acc_improvement = results_with_augment['final_val_acc'] - single_results['final_val_acc']
    f1_improvement = results_with_augment['final_val_f1'] - single_results['final_val_f1']
    
    print(f"\nImprovement with SpecAugment:")
    print(f"  Accuracy: {acc_improvement:+.4f}")
    print(f"  F1 Score: {f1_improvement:+.4f}")
else:
    print("Run the original training first to compare results.")

In [None]:
# Custom SpecAugment parameters experiment
print("Experimenting with more aggressive SpecAugment parameters...")

# More aggressive parameters for experimentation
aggressive_params = {
    'time_mask_param': 60,  # Larger time masks
    'freq_mask_param': 20,  # Larger frequency masks
    'num_time_masks': 2,    # Multiple time masks
    'num_freq_masks': 1,
    'mask_value': 0.0,
    'p': 0.9               # Higher probability
}

print(f"Aggressive SpecAugment parameters: {aggressive_params}")

# Train with aggressive parameters
results_aggressive = fast_single_fold_training_with_augmentation(
    dataset=dataset,
    train_indices=train_indices,
    val_indices=val_indices,
    model_class=models.BirdCNN,
    num_classes=len(set(labels_list)),
    num_epochs=200,  # Shorter training for experimentation
    batch_size=48,
    lr=0.001,
    use_class_weights=True,
    estop=35,
    standardize=True,
    augment_params=aggressive_params
)

In [None]:
# Compare all SpecAugment variants
print("COMPARISON: Recommended vs Aggressive SpecAugment")
print("=" * 60)

print("Recommended SpecAugment:")
print(f"  Final Val Accuracy: {results_with_augment['final_val_acc']:.4f}")
print(f"  Final Val F1 Score: {results_with_augment['final_val_f1']:.4f}")

print(f"\nAggressive SpecAugment:")
print(f"  Final Val Accuracy: {results_aggressive['final_val_acc']:.4f}")
print(f"  Final Val F1 Score: {results_aggressive['final_val_f1']:.4f}")

# Determine which is better
if results_aggressive['final_val_f1'] > results_with_augment['final_val_f1']:
    print(f"\n✓ Aggressive parameters perform better!")
    best_augment_results = results_aggressive
    best_params = aggressive_params
else:
    print(f"\n✓ Recommended parameters perform better!")
    best_augment_results = results_with_augment
    best_params = recommended_params

print(f"Best SpecAugment parameters: {best_params}")