# 📁 08_prepare_augmented_csvs.ipynb

# 🎯 Step 8: Create Separate CSVs for Augmented Data

"""
This notebook creates separate CSV files for each augmentation type
(noised, pitch-shifted, stretched, mixed) based on the filenames.
"""


In [1]:
# 📂 Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 📚 Libraries
import pandas as pd
import os

# 📂 Load the original CSV (manually labeled + balanced)
original_csv = "/content/drive/MyDrive/final_csv/train_final.csv"
df = pd.read_csv(original_csv)

# 📂 Output folder
output_dir = "/content/drive/MyDrive/final_csv"
os.makedirs(output_dir, exist_ok=True)

# 🔖 Define suffixes and corresponding output filenames
versions = {
    "": "train_final_original.csv",
    "_noised": "train_final_noised.csv",
    "_stretched": "train_final_stretched.csv",
    "_pitch": "train_final_pitched.csv",
    "_mixed": "train_final_mixed.csv"
}

# 🔄 Create CSV for each augmentation
for suffix, output_filename in versions.items():
    all_rows = []

    for idx, row in df.iterrows():
        base_name = row['filename'].replace('.wav', '')  # Remove .wav
        label = row['label']

        # Add original file
        all_rows.append({
            'filename': f"{base_name}.png",
            'label': label
        })

        # Add augmented file (only for augmented versions)
        if suffix != "":
            all_rows.append({
                'filename': f"{base_name}{suffix}.png",
                'label': label
            })

    # Save the new CSV
    df_combined = pd.DataFrame(all_rows)
    save_path = os.path.join(output_dir, output_filename)
    df_combined.to_csv(save_path, index=False)

    print(f"✅ CSV created: {save_path}")

print("\n🎯 All CSV files generated successfully!")
