In [None]:
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import os
from time import perf_counter

In [None]:
data_dir = '../data/genres_original'
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
window_size = 3

In [None]:
columns = ['filename', 'length', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
           'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
           'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
           'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
           'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo',
           'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean',
           'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var',
           'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
           'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var',
           'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean',
           'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
           'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean',
           'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var', 'label']

In [None]:
def extract_features(window):
    chromogram = librosa.feature.chroma_stft(window)
    rms = librosa.feature.rms(window)
    centroid = librosa.feature.spectral_centroid(window)
    bandwidth = librosa.feature.spectral_bandwidth(window)
    roll_off = librosa.feature.spectral_rolloff(window)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(window)
    harmony, perceptual = librosa.effects.hpss(window)
    tempo, _ = librosa.beat.beat_track(window)
    mfcc = librosa.feature.mfcc(window)

    mfcc_values = []
    for mean, var in zip(mfcc.mean(axis=1), np.var(mfcc, axis=1)):
        mfcc_values.append(mean)
        mfcc_values.append(var)

    return [
        chromogram.mean(),
        np.var(chromogram),
        rms.mean(),
        np.var(rms),
        centroid.mean(),
        np.var(centroid),
        bandwidth.mean(),
        np.var(bandwidth),
        roll_off.mean(),
        np.var(roll_off),
        zero_crossing_rate.mean(),
        np.var(zero_crossing_rate),
        harmony.mean(),
        np.var(harmony),
        perceptual.mean(),
        np.var(perceptual),
        tempo,
        *mfcc_values
    ]


In [None]:
t1 = perf_counter()
features = []
for genre in genres:
    for file in os.listdir(f'{data_dir}/{genre}'):
        try:
            signal, sample_rate = librosa.load(f'{data_dir}/{genre}/{file}')
            signal, _ = librosa.effects.trim(signal)  # Get rid of silence at the begining and end
            n_points = window_size * sample_rate
            for i in range(int(len(signal) / n_points)):
                print(genre, file, i, end='\r')
                window = signal[i * n_points:(i + 1) * n_points]
                if len(window):
                    values = extract_features(window)
                    values = [file.replace('.wav', f'.{i}.wav'), len(window), *values,genre]
                    features.append(values)
        except Exception as ex:
            print(ex)

t2 = perf_counter()
print(int(t2 - t1), 'seconds')

In [15]:
print(int(t2 - t1), 'seconds')

2696 seconds


In [None]:
df = pd.DataFrame(features)

In [None]:
df.columns = columns

In [16]:
df

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66150,0.335434,0.091088,0.130405,0.003521,1773.285877,168244.728448,1972.723622,117298.851326,...,39.725563,-3.241225,36.486435,0.721986,38.096756,-5.043307,33.608326,-0.237658,43.827770,blues
1,blues.00000.1.wav,66150,0.343020,0.086142,0.112699,0.001450,1816.195860,90703.325185,2009.201575,65548.531475,...,65.312973,-6.100084,40.738815,0.219240,50.587799,-2.875456,96.975441,5.800725,60.006119,blues
2,blues.00000.2.wav,66150,0.346838,0.092210,0.132002,0.004620,1788.642783,111322.537051,2085.045996,74755.260660,...,67.582199,-1.801321,28.132215,2.308315,48.102486,-1.931482,53.116814,2.522432,33.136238,blues
3,blues.00000.3.wav,66150,0.363671,0.086856,0.132562,0.002447,1654.902168,112316.264385,1959.202709,83672.222326,...,47.059677,-3.859234,27.984928,1.255384,35.093281,-3.619866,51.420628,3.639336,31.930040,blues
4,blues.00000.4.wav,66150,0.335927,0.088291,0.143289,0.001701,1630.737017,79648.228297,1948.459295,60221.595017,...,30.307617,0.629567,44.883640,1.709781,51.706692,-3.402106,26.686306,0.546950,29.212286,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9975,rock.00099.5.wav,66150,0.349242,0.080523,0.050019,0.000097,1499.033452,164296.530035,1718.741487,85910.800698,...,42.411770,-9.072773,38.522133,-4.239303,31.064833,-5.625192,48.812859,1.794132,38.993576,rock
9976,rock.00099.6.wav,66150,0.372667,0.082638,0.057897,0.000088,1847.993567,281007.290736,1906.381942,99815.431686,...,32.480297,-12.389524,65.873482,-3.085007,54.283775,-11.963290,63.390671,0.404298,18.759731,rock
9977,rock.00099.7.wav,66150,0.347207,0.088840,0.052402,0.000701,1346.166434,662977.657576,1562.051940,139002.057676,...,78.050781,-2.524271,21.777952,4.799636,25.962271,1.797535,48.307682,-0.320112,41.750011,rock
9978,rock.00099.8.wav,66150,0.387354,0.084762,0.066430,0.000320,2084.876439,203148.324534,2019.066228,22200.073744,...,28.323931,-5.370702,17.280422,6.469846,21.370140,2.359090,24.827066,0.688447,12.747364,rock


In [None]:
df.to_csv('../data/features_3_sec_own.csv', index=False)