In [1]:
import sys
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
sys.path.append(os.path.abspath('..'))
import utils

In [2]:
AUDIO_DIR = '../dataset/data/genres_original/'

In [3]:
tracks = pd.read_csv('../project_data/tracks.csv')
track_ids = tracks['file_name']

In [4]:
# Create MultiIndex for columns
multi_col_index = pd.MultiIndex.from_product(
    [range(13), ['mean', 'min', 'max']], names=['mfcc_coeff', 'stat']
)

# Create DataFrame with MultiIndex columns and track_id as row index
mfcc_df = pd.DataFrame(columns=multi_col_index, index=track_ids)

cnt = 0

for i in track_ids:
    cnt += 1
    if cnt % 100 == 0:
        print(f"Processing track {cnt}...")
    path = utils.get_audio_path(AUDIO_DIR, i)
    try:
        y, sr = librosa.load(path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        mfcc_mean = np.mean(mfccs, axis=1)
        mfcc_min = np.min(mfccs, axis=1)
        mfcc_max = np.max(mfccs, axis=1)

        data = np.stack([mfcc_mean, mfcc_min, mfcc_max], axis=1).flatten()

        mfcc_df.loc[i] = data
    except Exception as e:
        print(f"Error processing {i}: {e}")
        continue

Processing track 100...
Processing track 200...
Processing track 300...
Processing track 400...
Processing track 500...
Processing track 600...
Processing track 700...
Processing track 800...
Processing track 900...


In [5]:
mfcc_df.reset_index(inplace=True)
mfcc_df.head()

mfcc_coeff,file_name,0,0,0,1,1,1,2,2,2,...,9,10,10,10,11,11,11,12,12,12
stat,Unnamed: 1_level_1,mean,min,max,mean,min,max,mean,min,max,...,max,mean,min,max,mean,min,max,mean,min,max
0,blues.00000.wav,-113.598824,-257.911011,21.742056,121.570671,46.890461,164.754761,-19.162262,-70.900154,28.366945,...,39.424351,-8.326061,-33.125565,15.191151,8.802088,-12.483557,31.242996,-3.669941,-26.569305,22.482096
1,blues.00001.wav,-207.523834,-448.555634,-25.956675,123.985138,28.88229,192.770782,8.947019,-78.69281,59.997047,...,40.404282,-5.558824,-37.257187,24.502066,5.377876,-25.349419,37.242592,-2.234492,-27.189575,19.230034
2,blues.00002.wav,-90.757164,-343.934814,32.401398,140.440872,-11.795291,198.838486,-29.084547,-85.009323,46.547844,...,31.050621,-13.125314,-45.190308,13.12067,5.791247,-32.569252,25.241777,-8.901967,-28.854305,23.078028
3,blues.00003.wav,-199.575134,-363.953613,-14.066174,150.086105,62.801785,235.073914,5.663404,-63.067093,43.561996,...,32.761795,-3.200026,-45.598267,25.346262,6.078081,-14.12202,40.363243,-2.478445,-32.890564,20.651331
4,blues.00004.wav,-160.354172,-356.302917,-6.147505,126.209496,-11.409155,188.284653,-35.581394,-84.612366,21.923258,...,30.684135,-13.084959,-41.40929,24.979959,-2.810499,-30.323238,31.72102,-6.934471,-36.607307,24.070385


In [6]:
mfcc_df.to_csv('mfcc_data_gtzan_v1.csv', index=True)

In [7]:
mfcc_df = pd.read_csv('mfcc_data_gtzan_v1.csv', header=[0, 1], index_col=0)