#📁 9_prepare_test_data.ipynb

# 🎟️ Step 9: Prepare Mel Spectrograms and CSV for Test Data

"""
This notebook generates Mel Spectrograms for the final test audio files
and prepares a matching CSV file (filename and label).
"""


In [5]:
# 📂 Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 📦 Libraries
import os
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

# 🔍 Load original labels
original_csv = "/content/drive/MyDrive/balanced_data.csv"
label_df = pd.read_csv(original_csv)
label_map = dict(zip(label_df['filename'], label_df['label']))

# 📂 Paths
input_folder = "/content/drive/MyDrive/final_test_data"  # folder containing original test .wav files
output_folder = "/content/drive/MyDrive/melspec_test"  # where .png spectrograms will be saved
os.makedirs(output_folder, exist_ok=True)

# 🔄 Generate Mel Spectrograms and collect filenames and labels
test_records = []

for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        filepath = os.path.join(input_folder, filename)
        try:
            # Load and process audio
            y, sr = librosa.load(filepath, sr=16000)
            mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            mel_db = librosa.power_to_db(mel_spec, ref=np.max)

            # Save Mel Spectrogram
            plt.figure(figsize=(3, 3))
            librosa.display.specshow(mel_db, sr=sr, x_axis='time', y_axis='mel')
            plt.axis('off')
            save_path = os.path.join(output_folder, filename.replace('.wav', '.png'))
            plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
            plt.close()

            # Match label
            base_filename = filename
            if base_filename in label_map:
                label = label_map[base_filename]
                test_records.append({"filename": base_filename, "label": label})
            else:
                print(f"⚠️ No label found for {filename}")

        except Exception as e:
            print(f"⚠️ Error processing {filename}: {e}")

# 📅 Save CSV
test_df = pd.DataFrame(test_records)
test_df.to_csv("/content/drive/MyDrive/test_final.csv", index=False)
print(f"✅ Test dataset prepared. Total files: {len(test_df)}")
