In [None]:
# pip install pydub // You need to install pydub to run this notebook

In [None]:
import os
import pandas as pd
import numpy as np
import librosa
from pydub import AudioSegment

In [None]:
# This line usually takes 25-30 minutes to run code for all audio files 
BASE_PATH = "data/"
data_df=[]
genres = os.listdir(BASE_PATH)
for g in genres:
    genre_path = os.path.join(BASE_PATH, g)
    for file_name in os.listdir(genre_path):
        song_path = os.path.join(genre_path, file_name)

        try:
            y, sr = librosa.load(song_path, mono=True)
            segment_duration = 3 
            samples_per_segment = segment_duration * sr
            num_segments = len(y) // samples_per_segment 

            for i in range(num_segments):
                start = i * samples_per_segment
                end = start + samples_per_segment
                segment = y[start:end]

                if len(segment) < samples_per_segment:
                    continue

                chroma_stft = librosa.feature.chroma_stft(y=segment, sr=sr)
                rms = librosa.feature.rms(y=segment)
                spec_cent = librosa.feature.spectral_centroid(y=segment, sr=sr)
                spec_bw = librosa.feature.spectral_bandwidth(y=segment, sr=sr)
                spec_flat = librosa.feature.spectral_flatness(y=segment)
                rolloff = librosa.feature.spectral_rolloff(y=segment, sr=sr)
                zcr = librosa.feature.zero_crossing_rate(segment)
                harmony, perceptr = librosa.effects.harmonic(segment), librosa.effects.percussive(segment)
                tempo, _ = librosa.beat.beat_track(y=segment, sr=sr)
                mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20)
                tonnetz = librosa.feature.tonnetz(y=segment, sr=sr)
                cens = librosa.feature.chroma_cens(y=segment, sr=sr)

                features = [
                    f"{file_name}_{i+1}", len(segment),
                    np.mean(chroma_stft), np.var(chroma_stft),
                    np.mean(rms), np.var(rms),
                    np.mean(spec_cent), np.var(spec_cent),
                    np.mean(spec_bw), np.var(spec_bw),
                    np.mean(spec_flat), np.var(spec_flat),
                    np.mean(rolloff), np.var(rolloff),
                    np.mean(zcr), np.var(zcr),
                    np.mean(harmony), np.var(harmony),
                    np.mean(perceptr), np.var(perceptr),
                    np.mean(tonnetz), np.var(tonnetz),
                    np.mean(cens), np.var(cens),
                    float(tempo)
                ]

                for coeff in mfcc:
                    features.append(np.mean(coeff))
                    features.append(np.var(coeff))

                features.append(g)
                data_df.append(features)

        except Exception as e:
            print(f"Error processing {song_path}: {e}")


In [None]:
columns=['filename', 'length',
         'chroma_stft_mean', 'chroma_stft_var',
         'rms_mean', 'rms_var',
         'spectral_centroid_mean', 'spectral_centroid_var',
         'spectral_bandwidth_mean', 'spectral_bandwidth_var',
         'spectral_flatness_mean', 'spectral_flatness_var',
         'rolloff_mean', 'rolloff_var',
         'zero_crossing_rate_mean','zero_crossing_rate_var',
         'harmony_mean', 'harmony_var',
         'perceptr_mean', 'perceptr_var',
         'tonnetz_mean', 'tonnetz_var',
         'chroma_cens_mean', 'chroma_cens_var',
         'tempo'] + \
         [f'mfcc{i+1}_{stat}' for i in range(20) for stat in ['mean', 'var']]+\
         ['label']
df = pd.DataFrame(data_df, columns=columns)
df.to_csv('csv/mldata_v2.csv',index=False)