In [None]:
import sys
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
sys.path.append(os.path.abspath('..'))
import utils
from tqdm import tqdm

In [None]:
AUDIO_DIR = '../dataset/data/genres_original/'

In [None]:
tracks = pd.read_csv('../project_data/tracks.csv')
track_ids = tracks['file_name']

In [None]:
# Create MultiIndex for columns
multi_col_index = pd.MultiIndex.from_product(
    [range(13), ['mean', 'min', 'max']], names=['mfcc_coeff', 'stat']
)

# Create DataFrame with MultiIndex columns and track_id as row index
mfcc_df = pd.DataFrame(columns=multi_col_index, index=track_ids)

for i in tqdm(track_ids, desc="Processing tracks"):
    path = utils.get_audio_path(AUDIO_DIR, i)
    try:
        y, sr = librosa.load(path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        mfcc_mean = np.mean(mfccs, axis=1)
        mfcc_min = np.min(mfccs, axis=1)
        mfcc_max = np.max(mfccs, axis=1)

        data = np.stack([mfcc_mean, mfcc_min, mfcc_max], axis=1).flatten()

        mfcc_df.loc[i] = data
    except Exception as e:
        print(f"Error processing {i}: {e}")
        continue

In [None]:
mfcc_df.reset_index(inplace=True)
mfcc_df.head()

In [None]:
mfcc_df.to_csv('mfcc_data_gtzan_v1.csv', index=True)

In [None]:
mfcc_df = pd.read_csv('mfcc_data_gtzan_v1.csv', header=[0, 1], index_col=0)

In [None]:
# Define the feature names and statistics
features = [
    'mfcc',
    'spectral_centroid',
    'spectral_bandwidth',
    'zero_crossing_rate',
    'rmse',
    'spectral_contrast'
]
stats = ['mean', 'min', 'max']

# Define subfeatures
mfcc_subfeatures = list(range(13))
single_subfeatures = ['0']  # For centroid, bandwidth, zcr, rmse
contrast_subfeatures = list(range(6))  # Spectral contrast now has 6 bands

feature_subfeatures = [
    mfcc_subfeatures,         # mfcc
    single_subfeatures,       # spectral_centroid
    single_subfeatures,       # spectral_bandwidth
    single_subfeatures,       # zero_crossing_rate
    single_subfeatures,       # rmse
    contrast_subfeatures      # spectral_contrast
]

# Create MultiIndex for all features except tempo
multi_col_index = pd.MultiIndex.from_tuples(
    [(feat, sub, stat)
     for feat, sub_feats in zip(features, feature_subfeatures)
     for sub in sub_feats
     for stat in stats],
    names=["feature", "subfeature", "stat"]
)

# Add tempo as a single-value column
tempo_col = pd.MultiIndex.from_tuples(
    [('tempo', '0', '0')],
    names=["feature", "subfeature", "stat"]
)

# Combine indexes
full_index = multi_col_index.append(tempo_col)

# Initialize DataFrame
feature_df = pd.DataFrame(columns=full_index, index=track_ids)

# Processing loop
for i in tqdm(track_ids, desc="Processing tracks"):
    path = utils.get_audio_path(AUDIO_DIR, i)
    try:
        y, sr = librosa.load(path, sr=None)

        # MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfcc_stats = np.stack([
            np.mean(mfccs, axis=1),
            np.min(mfccs, axis=1),
            np.max(mfccs, axis=1)
        ], axis=1)

        # Spectral Centroid
        centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        centroid_stats = [np.mean(centroid), np.min(centroid), np.max(centroid)]

        # Spectral Bandwidth
        bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
        bandwidth_stats = [np.mean(bandwidth), np.min(bandwidth), np.max(bandwidth)]

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)[0]
        zcr_stats = [np.mean(zcr), np.min(zcr), np.max(zcr)]

        # Root Mean Square Energy (RMSE)
        rmse = librosa.feature.rms(y=y)[0]
        rmse_stats = [np.mean(rmse), np.min(rmse), np.max(rmse)]

        # Spectral Contrast
        contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        contrast_stats = np.stack([
            np.mean(contrast[:-1], axis=1),  # Exclude the last band
            np.min(contrast[:-1], axis=1),
            np.max(contrast[:-1], axis=1)
        ], axis=1)

        # Tempo
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # Combine all data
        row_data = []
        row_data.extend(mfcc_stats.flatten())
        row_data.extend(centroid_stats)
        row_data.extend(bandwidth_stats)
        row_data.extend(zcr_stats)
        row_data.extend(rmse_stats)
        row_data.extend(contrast_stats.flatten())
        row_data.append(tempo[0])  # Single value for tempo

        feature_df.loc[i] = row_data

    except Exception as e:
        print(f"Error processing {i}: {e}")
        continue


In [None]:
feature_df.reset_index(inplace=True)

In [None]:
feature_df.head()

In [None]:
feature_df.to_csv('mfcc_data_gtzan_v2.csv', index=True)

In [None]:
# To properly load the CSV with MultiIndex columns:
feature_df = pd.read_csv('mfcc_data_gtzan_v2.csv', header=[0, 1, 2], index_col=0)