Please read, complete and clean up ModelTesting.ipynb. Its purpose is to run the test set inference on all the final_cnn_models and final_fcnn_models. I then want to plot 1 main graph, a columns graph displaying the F1 Score for each of the six models, with an edge coloring of the best one.

# Model Testing on Test Set

Runs inference on the test audio files and displays the final metrics of the chosen models

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, log_loss
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Add parent directory to path for imports
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import our custom modules
from utils.inference import perform_audio_inference, perform_audio_inference_fcnn
from utils.models import BirdCNN

print("All imports successful!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}")

All imports successful!
PyTorch version: 2.7.1+cu128
CUDA available: True
Device: cuda


In [None]:
# Config
NUM_CLASSES = 33
MODEL_WEIGHTS_PATH = os.path.join('..', 'models', 'bird_cnn.pth')
TEST_AUDIO_DIR = os.path.join('..', 'database', 'audio', 'test')
TEST_METADATA_PATH = os.path.join('..', 'database', 'meta', 'test_data.csv')

# Load test metadata
print("Loading test metadata...")
test_df = pd.read_csv(TEST_METADATA_PATH)
print(f"✓ Loaded {len(test_df)} test samples")
print(f"✓ Columns: {list(test_df.columns)}")
print(f"✓ Classes represented: {test_df['class_id'].nunique()}")
print(f"✓ Class distribution:")
print(test_df['class_id'].value_counts().sort_index())

# Show first few rows
print("\nFirst 5 rows:")
print(test_df.head())

# Verify audio files exist and create file mapping
print("Verifying audio files...")
audio_files = os.listdir(TEST_AUDIO_DIR)
print(f"✓ Found {len(audio_files)} audio files in test directory")

# Create mapping of filename to full path and check if all metadata files exist
test_files = []
missing_files = []

for _, row in test_df.iterrows():
    filename = row['filename']
    audio_path = os.path.join(TEST_AUDIO_DIR, filename)
    
    if os.path.exists(audio_path):
        test_files.append({
            'filename': filename,
            'path': audio_path,
            'class_id': row['class_id']
        })
    else:
        missing_files.append(filename)

print(f"✓ Valid files: {len(test_files)}")
if missing_files:
    print(f"⚠ Missing files: {len(missing_files)}")
    print(f"  First few missing: {missing_files[:5]}")
else:
    print("✓ All metadata files found in audio directory")

print(f"\nReady to test on {len(test_files)} audio files")

In [None]:
# Define the Configurations to try
from utils.final_models import BirdCNN_v5, BirdCNN_v5c, BirdCNN_v7, BirdCNN_v7e
from utils.fcnn_models import ...

v5_path = ...

# Final Models ({'name': (Model Class, PTH Path)})
final_cnn_models = {
    
}

final_fcnn_models = {
    
}

# Storage for results
all_predictions = []
all_probabilities = []
all_true_labels = []
failed_files = []

In [None]:
# Process each test file, CNNs
for i, file_info in enumerate(tqdm(test_files, desc="Processing audio files")):
    try:
        # Perform inference
        probabilities = perform_audio_inference(
            audio_path=file_info['path'],
            model_class=BirdCNN,
            model_weights_path=MODEL_WEIGHTS_PATH,
            reduce_noise=False
        )
        
        # Get predicted class (argmax of probabilities)
        predicted_class = np.argmax(probabilities)
        
        # Store results
        all_probabilities.append(probabilities)
        all_predictions.append(predicted_class)
        all_true_labels.append(file_info['class_id'])
        
        # Progress update every 50 files
        if (i + 1) % 50 == 0:
            print(f"Processed {i + 1}/{len(test_files)} files")
            
    except Exception as e:
        print(f"Failed to process {file_info['filename']}: {str(e)}")
        failed_files.append(file_info['filename'])
        continue

print(f"\n✓ Inference completed!")
print(f"✓ Successfully processed: {len(all_predictions)} files")
print(f"✓ Failed files: {len(failed_files)}")

if failed_files:
    print(f"Failed files: {failed_files[:5]}{'...' if len(failed_files) > 5 else ''}")

In [None]:
# Process each test file, FCNNs
for i, file_info in enumerate(tqdm(test_files, desc="Processing audio files")):
    try:
        # Perform inference
        probabilities = perform_audio_inference_fcnn(
            audio_path=file_info['path'],
            model_class=BirdCNN,
            model_weights_path=MODEL_WEIGHTS_PATH,
            reduce_noise=False
        )
        
        # Get predicted class (argmax of probabilities)
        predicted_class = np.argmax(probabilities)
        
        # Store results
        all_probabilities.append(probabilities)
        all_predictions.append(predicted_class)
        all_true_labels.append(file_info['class_id'])
        
        # Progress update every 50 files
        if (i + 1) % 50 == 0:
            print(f"Processed {i + 1}/{len(test_files)} files")
            
    except Exception as e:
        print(f"Failed to process {file_info['filename']}: {str(e)}")
        failed_files.append(file_info['filename'])
        continue

print(f"\n✓ Inference completed!")
print(f"✓ Successfully processed: {len(all_predictions)} files")
print(f"✓ Failed files: {len(failed_files)}")

if failed_files:
    print(f"Failed files: {failed_files[:5]}{'...' if len(failed_files) > 5 else ''}")

## Results

In [None]:
# plot the F1 columns graph here