In [1]:
import os, librosa, librosa.display
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import sys

from IPython.display import Audio

sys.path.append('..')
audio_root_folder = '../archive/data'

In [2]:
labels_csv = os.path.join(audio_root_folder, 'features_30_sec.csv')
df = pd.read_csv(labels_csv, header=0)
df.drop(df.loc[df.filename == 'jazz.00054.wav'].index, inplace=True)
df.head(5)

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [3]:
df.columns

Index(['filename', 'length', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo',
       'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean',
       'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var',
       'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
       'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var',
       'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean',
       'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
       'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean',
       'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var',
  

In [7]:
import os
import librosa
from librosa import feature, effects
import numpy as np
from multiprocessing import Pool
from tqdm import tqdm

def load_audios(root_folder, filename, label):
    audio_path = os.path.join(root_folder, 'genres_original', label, filename)
    x, sample_rate = librosa.load(audio_path)
    return x, sample_rate

filenames = df['filename'].tolist()
labels = df['label'].tolist()
root_folder = '../archive/data'
# Usage
audios = [load_audios(root_folder, filenames[i], labels[i]) for i in tqdm(range(len(labels)))]

  0%|          | 0/999 [00:00<?, ?it/s]

100%|██████████| 999/999 [00:03<00:00, 286.69it/s]


In [42]:
def generate_features(x, sample_rate):
    rms = feature.rms(y=x)
    spectral_centroid = feature.spectral_centroid(y=x, sr=sample_rate)
    spectral_bandwidth = feature.spectral_bandwidth(y=x, sr=sample_rate)
    spectral_rolloff = feature.spectral_rolloff(y=x, sr=sample_rate)
    zero_crossing_rate = feature.zero_crossing_rate(y=x)
    harmony = effects.harmonic(y=x)
    tempo = feature.tempo(y=x, sr=sample_rate)
    mfccs = feature.mfcc(y=x, sr=sample_rate, n_mfcc=20)
    chromas = feature.chroma_stft(y=x, sr=sample_rate, n_chroma=12)

    features = [tempo[0]]
    features.extend([rms.mean(), rms.var(),
                     spectral_centroid.mean(), spectral_centroid.var(),
                     spectral_bandwidth.mean(), spectral_bandwidth.var(),
                     spectral_rolloff.mean(), spectral_rolloff.var(),
                     zero_crossing_rate.mean(), zero_crossing_rate.var(),
                     harmony.mean(), harmony.var()])
    features.extend(mfccs.mean(axis=1).tolist())
    features.extend(mfccs.var(axis=1).tolist())
    features.extend(chromas.mean(axis=1).tolist())
    features.extend(chromas.var(axis=1).tolist())

    return features

In [43]:
features = []
for (audio, sample_rate) in tqdm(audios, total=len(audios)):
    features.append(generate_features(audio, sample_rate))

100%|██████████| 999/999 [28:25<00:00,  1.71s/it]


In [44]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
labels = encoder.fit_transform(np.array(labels))
num_classes = np.max(labels) + 1

In [45]:
features = np.array(features)
features.shape, labels.shape

((999, 77), (999,))

In [80]:
data = np.hstack([features, labels[:, np.newaxis]])
data.shape

(999, 78)

In [83]:
feature_names = ['tempo', 'rms_mean', 'rms_var',
                 'spectral_centroid_mean', 'spectral_centroid_var',
                 'spectral_bandwidth_mean', 'spectral_bandwidth_var',
                 'spectral_rolloff_mean', 'spectral_rolloff_var',
                 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
                 'harmony_mean', 'harmony_var']
feature_names.extend([f'mfcc_mean_{i}' for i in range(20)])
feature_names.extend([f'mfcc_var_{i}' for i in range(20)])
feature_names.extend([f'chroma_mean_{i}' for i in range(12)])
feature_names.extend([f'chroma_var_{i}' for i in range(12)])
feature_names.append('label')

len(feature_names)

78

In [84]:
df = pd.DataFrame(data, columns=feature_names)
df.to_csv('../processed_data/tabular_data/features.csv', index=False)

In [47]:
import os

os.makedirs('../processed_data/tabular_data', exist_ok=True)
np.save('../processed_data/tabular_data/data.npy', features)
np.save('../processed_data/tabular_data/label.npy', labels)