In [None]:
import os
import librosa
import numpy as np
import pandas as pd

# Set root directory
DATASET_PATH = "Music DataSet/genres_original"
genres = os.listdir(DATASET_PATH)  # auto-detect folders as genre labels
X = []
y = []

In [None]:
# 🔁 Loop through each genre and file
for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if not os.path.isdir(genre_path):
        continue

    print(f"Processing genre: {genre}")

    for file_name in os.listdir(genre_path):
        if not file_name.endswith(".wav"):
            continue

        file_path = os.path.join(genre_path, file_name)

        try:
            # 🎧 Load audio
            y_audio, sr = librosa.load(file_path, duration=30)

            # 🎵 MFCCs
            mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=13)
            mfcc_mean = np.mean(mfcc.T, axis=0)
            mfcc_std = np.std(mfcc.T, axis=0)

            # 🎹 Chroma
            chroma = librosa.feature.chroma_stft(y=y_audio, sr=sr)
            chroma_mean = np.mean(chroma.T, axis=0)

            # 🌈 Spectral contrast
            contrast = librosa.feature.spectral_contrast(y=y_audio, sr=sr)
            contrast_mean = np.mean(contrast.T, axis=0)

            # 🔗 Tonnetz
            y_harmonic = librosa.effects.harmonic(y_audio)
            tonnetz = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
            tonnetz_mean = np.mean(tonnetz.T, axis=0)

            # 🥁 Tempo
            tempo, _ = librosa.beat.beat_track(y=y_audio, sr=sr)
            tempo = np.atleast_1d(tempo)  # ensures 1D shape like (1,)

            # ✅ Combine all into 1D array
            features = np.hstack(
                [mfcc_mean, mfcc_std, chroma_mean, contrast_mean, tonnetz_mean, tempo]
            )

            X.append(features)
            y.append(genre)

        except Exception as e:
            print(f"❌ Error loading {file_path}: {e}")

In [None]:
# Make sure X and y are NumPy arrays
X = np.array(X)  # shape: (n_samples, 13)
y = np.array(y).reshape(-1, 1)  # shape: (n_samples, 1)

# Concatenate features + labels
data = np.hstack((X, y))  # shape: (n_samples, 14)

# Column names
columns = [f"mfcc_{i + 1}" for i in range(52)] + ["label"]

# Create DataFrame
df = pd.DataFrame(data, columns=columns)

# Convert label column to string (optional but safe)
df["label"] = df["label"].astype(str)

df.to_csv("data.csv")
df.head()