Grid Search and Sweeps Here

In [None]:
import os, sys
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import torch.nn.functional as F
from itertools import product

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from utils.models import BirdCNN, BirdResNet
from utils.dataset_utils import StandardizedDataset
from utils.training_utils import train_single_fold
from utils.cross_validation import k_fold_cross_validation_with_predefined_folds
from utils.evaluation_utils import plot_kfold_results, save_model, load_model
import utils.split as split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == 'cuda':
    print(torch.cuda.get_device_name(device))
else:
    print("CUDA not available")

In [None]:
# Load dataset for grid searching
df = pd.read_csv(os.path.join('..', 'database', 'meta', 'final', 'train_data.csv'))

# Extract labels, authors, and pixel values
labels = df['label'].values.astype(np.int64)
authors = df['author'].values
features = df.drop(columns=['label', 'author']).values.astype(np.float32)
# Convert to 0-1 range first, then standardization will be applied per fold
features /= 255.0
features = features.reshape(-1, 1, 313, 224)

print("features shape:", features.shape)
print("labels shape:", labels.shape)
print("authors shape:", authors.shape)

# Create metadata DataFrame for splitting (with sample indices)
metadata_df = pd.DataFrame({
    'sample_idx': range(len(df)),
    'class_id': labels,
    'author': authors,
    'usable_segments': 1  # Each sample represents 1 segment
})

print("metadata_df shape:", metadata_df.shape)
print("Unique authors:", len(metadata_df['author'].unique()))
print("Unique classes:", len(metadata_df['class_id'].unique()))

# Prepare tensors
X_tensor = torch.tensor(features, dtype=torch.float32)
y_tensor = torch.tensor(labels, dtype=torch.long)
dataset = TensorDataset(X_tensor, y_tensor)

In [None]:
# Define grid search parameters
grid_params = {
    'batch_size': [16, 24, 32, 48],
    'lr': [0.0005, 0.001, 0.002],
    'num_epochs': [150, 200, 250],
    'use_class_weights': [True, False],
    'dropout_p': [0.3, 0.5, 0.7]
}

print("Grid search parameter space:")
for param, values in grid_params.items():
    print(f"  {param}: {values}")
    
total_combinations = np.prod([len(values) for values in grid_params.values()])
print(f"\nTotal combinations: {total_combinations}")

# For demonstration, let's run a smaller subset
limited_params = {
    'batch_size': [24, 48],
    'lr': [0.001, 0.002], 
    'num_epochs': [150, 200],
    'use_class_weights': [True, False]
}

print(f"\nLimited parameter space for demo:")
for param, values in limited_params.items():
    print(f"  {param}: {values}")
    
limited_combinations = np.prod([len(values) for values in limited_params.values()])
print(f"Limited combinations: {limited_combinations}")

In [None]:
# Find optimal train/test split for grid search
print("Finding best 80-20 split with author grouping for grid search...")
dev_df, test_df, best_split_score = split.search_best_group_seed(
    df=metadata_df,
    test_size=0.2,
    max_attempts=5_000,
    min_test_segments=5
)

# Extract indices for grid search
train_indices = dev_df['sample_idx'].values
val_indices = test_df['sample_idx'].values

print(f"Best split found with score: {best_split_score:.3f}")
print(f"Train samples: {len(train_indices)}, Validation samples: {len(val_indices)}")

# Initialize results tracking
grid_results = []
best_score = 0.0
best_params = None

print(f"\nStarting grid search with {limited_combinations} combinations...")
print("This may take a while...")

# Execute grid search
combination_count = 0
for params in product(*limited_params.values()):
    combination_count += 1
    param_dict = dict(zip(limited_params.keys(), params))
    
    print(f"\nCombination {combination_count}/{limited_combinations}: {param_dict}")
    
    try:
        # Run training with current parameters
        result = train_single_fold(
            dataset=dataset,
            train_indices=train_indices,
            val_indices=val_indices,
            model_class=BirdCNN,
            num_classes=len(set(labels)),
            num_epochs=param_dict['num_epochs'],
            batch_size=param_dict['batch_size'],
            lr=param_dict['lr'],
            use_class_weights=param_dict['use_class_weights'],
            estop=35,
            standardize=True
        )
        
        # Track results
        result_entry = {
            **param_dict,
            'final_val_acc': result['final_val_acc'],
            'final_val_f1': result['final_val_f1'],
            'best_val_acc': result['best_val_acc'],
            'best_val_f1': result['best_val_f1'],
            'training_time': result.get('training_time', 0)
        }
        
        grid_results.append(result_entry)
        
        # Check if this is the best result so far
        if result['final_val_f1'] > best_score:
            best_score = result['final_val_f1']
            best_params = param_dict.copy()
            print(f"  ✓ New best F1 score: {best_score:.4f}")
        else:
            print(f"  Final F1 score: {result['final_val_f1']:.4f}")
            
    except Exception as e:
        print(f"  ✗ Error with parameters {param_dict}: {str(e)}")
        continue

print(f"\nGrid search completed!")
print(f"Best parameters: {best_params}")
print(f"Best F1 score: {best_score:.4f}")

In [None]:
# Analyze grid search results
results_df = pd.DataFrame(grid_results)
print("Grid Search Results Summary:")
print("=" * 50)
print(results_df.describe())

# Sort by F1 score
results_df_sorted = results_df.sort_values('final_val_f1', ascending=False)
print(f"\nTop 5 Results (by Final Val F1):")
print(results_df_sorted.head().to_string(index=False))

# Parameter impact analysis
print(f"\nParameter Impact Analysis:")
for param in limited_params.keys():
    if param in results_df.columns:
        param_impact = results_df.groupby(param)['final_val_f1'].agg(['mean', 'std', 'count'])
        print(f"\n{param}:")
        print(param_impact)

# Visualize results
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Grid Search Results Analysis', fontsize=16)

# Plot 1: F1 scores distribution
axes[0, 0].hist(results_df['final_val_f1'], bins=10, alpha=0.7)
axes[0, 0].set_title('Distribution of Final Val F1 Scores')
axes[0, 0].set_xlabel('F1 Score')
axes[0, 0].set_ylabel('Frequency')

# Plot 2: Learning rate vs F1
if 'lr' in results_df.columns:
    lr_grouped = results_df.groupby('lr')['final_val_f1'].mean()
    axes[0, 1].bar(range(len(lr_grouped)), lr_grouped.values)
    axes[0, 1].set_title('Learning Rate vs Average F1 Score')
    axes[0, 1].set_xlabel('Learning Rate')
    axes[0, 1].set_ylabel('Average F1 Score')
    axes[0, 1].set_xticks(range(len(lr_grouped)))
    axes[0, 1].set_xticklabels([f'{lr:.4f}' for lr in lr_grouped.index])

# Plot 3: Batch size vs F1
if 'batch_size' in results_df.columns:
    batch_grouped = results_df.groupby('batch_size')['final_val_f1'].mean()
    axes[1, 0].bar(batch_grouped.index, batch_grouped.values)
    axes[1, 0].set_title('Batch Size vs Average F1 Score')
    axes[1, 0].set_xlabel('Batch Size')
    axes[1, 0].set_ylabel('Average F1 Score')

# Plot 4: Training time vs F1
if 'training_time' in results_df.columns:
    axes[1, 1].scatter(results_df['training_time'], results_df['final_val_f1'], alpha=0.7)
    axes[1, 1].set_title('Training Time vs F1 Score')
    axes[1, 1].set_xlabel('Training Time (seconds)')
    axes[1, 1].set_ylabel('F1 Score')

plt.tight_layout()
plt.show()

# Save results
results_df.to_csv('../database/meta/grid_search_results.csv', index=False)
print(f"\nResults saved to ../database/meta/grid_search_results.csv")