In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns # For confusion matrix plotting
import librosa
import librosa.display
import joblib # Not strictly needed for gender if no encoder was used for it
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# from tensorflow.keras.utils import to_categorical # Not needed for binary gender true labels
import warnings
warnings.filterwarnings('ignore')

# --- PATHS ---
# Path to the Common Voice dataset (added as input to this notebook)
COMMON_VOICE_DATASET_ROOT = "/kaggle/input/common-voice/"

# Path to YOUR UPLOADED MODEL ARTIFACTS DATASET
# This should be the dataset where your 'best_gender_model_cnn.h5' is stored.
# Assuming it's the same dataset as your age model, or a different one if you uploaded separately.
# Replace 'your-gender-model-dataset-slug' with the actual slug.
# If it's in the SAME dataset as the age model:
YOUR_MODEL_DATASET_SLUG = "gender-model-cnn" # Or your specific gender model dataset slug
MODEL_ARTIFACTS_PATH = f"/kaggle/input/{YOUR_MODEL_DATASET_SLUG}/"


# Paths for metadata CSVs from Common Voice dataset
COMMON_VOICE_METADATA_TEST_CSV = os.path.join(COMMON_VOICE_DATASET_ROOT, "cv-valid-test.csv")

# Path to the ACTUAL AUDIO FILES DIRECTORY for the test set
ACTUAL_AUDIO_TEST_CLIPS_PATH = os.path.join(COMMON_VOICE_DATASET_ROOT, "cv-valid-test", "cv-valid-test")

# --- Verification ---
print(f"Path to your uploaded model artifacts: {MODEL_ARTIFACTS_PATH}")
print(f"  - Exists: {os.path.exists(MODEL_ARTIFACTS_PATH)}")
if os.path.exists(MODEL_ARTIFACTS_PATH):
    print(f"  - Contents: {os.listdir(MODEL_ARTIFACTS_PATH)}")

print(f"Test Metadata CSV: {COMMON_VOICE_METADATA_TEST_CSV} - Exists: {os.path.exists(COMMON_VOICE_METADATA_TEST_CSV)}")
print(f"Actual Test Audio Clips Path: {ACTUAL_AUDIO_TEST_CLIPS_PATH} - Exists: {os.path.exists(ACTUAL_AUDIO_TEST_CLIPS_PATH)}")

In [None]:
# Parameters for spectrogram (should match those used during training the gender model)
SR = 22050
N_FFT = 2048
HOP_LENGTH = 512
N_MELS = 128
MAX_FRAMES = 256 # Max frames to ensure fixed size input

def audio_to_melspectrogram(filepath, sr=SR, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS, max_frames=MAX_FRAMES):
    try:
        audio, _ = librosa.load(filepath, sr=sr)
        mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
        log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

        if log_mel_spec.shape[1] < max_frames:
            pad_width = max_frames - log_mel_spec.shape[1]
            log_mel_spec = np.pad(log_mel_spec, pad_width=((0, 0), (0, pad_width)), mode='constant', constant_values=-80)
        else:
            log_mel_spec = log_mel_spec[:, :max_frames]
        return log_mel_spec
    except Exception as e:
        print(f"Error generating spectrogram for {filepath}: {e}")
        return None

print("Spectrogram parameters and function defined.")

In [None]:
def load_and_prepare_test_metadata_for_gender(csv_path, base_audio_path_root):
    try:
        df_meta = pd.read_csv(csv_path)
        print(f"Loaded {os.path.basename(csv_path)} with shape: {df_meta.shape}")
    except FileNotFoundError:
        print(f"ERROR: CSV file not found at {csv_path}.")
        return pd.DataFrame()

    # Keep 'age' for potential filtering or context, but primary focus is gender
    df_meta_cleaned = df_meta[['filename', 'age', 'gender']].copy()
    df_meta_cleaned.dropna(subset=['age', 'gender'], inplace=True) # Ensure both are present for consistency

    # Map gender to numeric (0 for female, 1 for male)
    df_meta_cleaned['gender_numeric_true'] = df_meta_cleaned['gender'].map({'female': 0, 'male': 1})
    # Drop rows where gender mapping might have failed (e.g., 'other' gender if not handled)
    df_meta_cleaned.dropna(subset=['gender_numeric_true'], inplace=True)
    df_meta_cleaned['gender_numeric_true'] = df_meta_cleaned['gender_numeric_true'].astype(int)


    # Construct full audio path
    def construct_actual_path(csv_filename_entry):
        parts = csv_filename_entry.split('/')
        if len(parts) == 2: # e.g., 'cv-valid-test/sample-xxxxx.mp3'
            return os.path.join(COMMON_VOICE_DATASET_ROOT, parts[0], parts[0], parts[1])
        return os.path.join(COMMON_VOICE_DATASET_ROOT, csv_filename_entry) # Fallback

    df_meta_cleaned['full_audio_path'] = df_meta_cleaned['filename'].apply(construct_actual_path)

    # Validate file existence
    original_count = len(df_meta_cleaned)
    df_meta_cleaned = df_meta_cleaned[df_meta_cleaned['full_audio_path'].apply(os.path.exists)].copy()
    print(f"Path validation: {len(df_meta_cleaned)} audio files found out of {original_count} entries.")

    print(f"Shape after processing for gender: {df_meta_cleaned.shape}")

    return df_meta_cleaned[['filename', 'gender', 'full_audio_path', 'gender_numeric_true']].reset_index(drop=True)


# Load test data and prepare it for gender evaluation
df_test_eval_gender = load_and_prepare_test_metadata_for_gender(COMMON_VOICE_METADATA_TEST_CSV,
                                                                ACTUAL_AUDIO_TEST_CLIPS_PATH)
if not df_test_eval_gender.empty:
    print("\nTest Data for Gender Evaluation (Head):")
    print(df_test_eval_gender.head())
else:
    print("df_test_eval_gender is empty after processing.")

In [None]:
# MODEL_ARTIFACTS_PATH is defined in Cell 1
gender_model_path_h5 = os.path.join(MODEL_ARTIFACTS_PATH, "best_gender_model_cnn.h5") # Ensure this filename is correct

loaded_gender_model_cnn = None
gender_model_loaded_for_pred = False
gender_display_map_dl = {0: "female", 1: "male"} # For displaying predictions

try:
    if os.path.exists(gender_model_path_h5):
        loaded_gender_model_cnn = tf.keras.models.load_model(gender_model_path_h5)
        print(f"Loaded gender model from: {gender_model_path_h5}")
        # loaded_gender_model_cnn.summary() # Optional
        gender_model_loaded_for_pred = True
    else:
        print(f"Gender model file not found at: {gender_model_path_h5}")
        print(f"Please ensure the file 'best_gender_model_cnn.h5' is present in your dataset: {MODEL_ARTIFACTS_PATH}")

    if gender_model_loaded_for_pred:
        print("\nGender model is ready for prediction.")
    else:
        print("\nGender model failed to load. Predictions cannot be made.")

except Exception as e:
    print(f"Error loading gender model: {e}")
    gender_model_loaded_for_pred = False

In [None]:
y_true_gender_indices = []
y_pred_gender_indices = []
y_pred_gender_labels_examples = [] # For viewing some predictions
true_gender_labels_examples = []

if gender_model_loaded_for_pred and not df_test_eval_gender.empty:
    print(f"\nStarting GENDER predictions on {len(df_test_eval_gender)} test samples...")
    
    # Decide how many samples to predict on. For full evaluation, use the whole df_test_eval_gender.
    # df_to_predict_gender = df_test_eval_gender.sample(n=min(100, len(df_test_eval_gender)), random_state=42) # Sample for speed
    df_to_predict_gender = df_test_eval_gender # Predict on all available test samples
    print(f"Predicting on {len(df_to_predict_gender)} files for gender evaluation.")

    for index, row in df_to_predict_gender.iterrows():
        audio_filepath = row['full_audio_path']
        true_gender_numeric = row['gender_numeric_true'] # This is 0 or 1
        true_gender_str = row['gender'] # This is 'female' or 'male'

        mel_spec = audio_to_melspectrogram(audio_filepath)
        if mel_spec is None:
            print(f"Skipping {audio_filepath} due to spectrogram error.")
            continue

        spectrogram_reshaped = np.expand_dims(mel_spec, axis=0)       # Add batch dim
        spectrogram_reshaped = np.expand_dims(spectrogram_reshaped, axis=-1) # Add channel dim

        try:
            gender_prob = loaded_gender_model_cnn.predict(spectrogram_reshaped, verbose=0)
            predicted_gender_idx = 1 if gender_prob[0][0] > 0.5 else 0 # Threshold sigmoid output

            y_true_gender_indices.append(int(true_gender_numeric))
            y_pred_gender_indices.append(predicted_gender_idx)
            
            if len(y_pred_gender_labels_examples) < 10: # Print a few examples
                 predicted_gender_label_str = gender_display_map_dl.get(predicted_gender_idx, "unknown")
                 y_pred_gender_labels_examples.append(predicted_gender_label_str)
                 true_gender_labels_examples.append(true_gender_str)

        except Exception as e:
            print(f"Error predicting gender for {audio_filepath}: {e}")
    
    print("\nExample Gender Predictions (True vs. Predicted):")
    for true_label, pred_label in zip(true_gender_labels_examples, y_pred_gender_labels_examples):
        print(f"True: {true_label:<10} | Predicted: {pred_label}")
    print(f"\nGender predictions completed on {len(y_pred_gender_indices)} files.")

else:
    if not gender_model_loaded_for_pred:
        print("Gender model not loaded. Cannot perform batch predictions.")
    if df_test_eval_gender.empty:
        print("df_test_eval_gender is empty. Cannot perform batch predictions.")

In [None]:
if gender_model_loaded_for_pred and y_true_gender_indices and y_pred_gender_indices:
    print("\n--- Gender Model Evaluation Metrics ---")

    # Overall Accuracy
    accuracy_gender = accuracy_score(y_true_gender_indices, y_pred_gender_indices)
    print(f"Overall Gender Accuracy: {accuracy_gender:.4f}")

    # Classification Report
    gender_class_names = [gender_display_map_dl[0], gender_display_map_dl[1]] # e.g., ['female', 'male']
    
    try:
        report_gender = classification_report(y_true_gender_indices, y_pred_gender_indices,
                                              labels=[0, 1], # Explicitly for binary
                                              target_names=gender_class_names,
                                              zero_division=0)
        print("\nGender Classification Report:")
        print(report_gender)
    except ValueError as ve:
        print(f"\nCould not generate full gender classification report: {ve}")
        # This is less likely for binary if predictions are made, but good to have
        unique_labels_gender = np.unique(y_true_gender_indices + y_pred_gender_indices)
        if len(unique_labels_gender) > 0:
             report_subset_gender = classification_report(y_true_gender_indices, y_pred_gender_indices,
                                                       labels=unique_labels_gender,
                                                       target_names=[gender_display_map_dl[i] for i in unique_labels_gender],
                                                       zero_division=0)
             print("Showing report for available gender labels:\n", report_subset_gender)
        else:
            print("No predictions available to generate a report.")


    # Confusion Matrix
    cm_gender = confusion_matrix(y_true_gender_indices, y_pred_gender_indices, labels=[0, 1])
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm_gender, annot=True, fmt='d', cmap='Blues',
                xticklabels=gender_class_names,
                yticklabels=gender_class_names)
    plt.xlabel('Predicted Gender')
    plt.ylabel('True Gender')
    plt.title('Confusion Matrix for Gender Prediction')
    plt.show()
else:
    print("Not enough data or gender model not loaded for evaluation.")