In [2]:
import os
import pandas as pd
import numpy as np
import sklearn
import librosa
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras

In [3]:
data_path = "../artifacts/data/genres_original/"

audio_labels = []
audio_paths = []

for root, dirs, files in os.walk(data_path, topdown=False):
    for file in files:
        if file.endswith(".wav"):
            audio_paths.append(os.path.join(root, file))
            label, _ = file.split(".", 1)
            audio_labels.append(label)


audio_labels = np.array(audio_labels)
audio_paths = np.array(audio_paths)

In [5]:
spec = np.empty([1000, 1025, 1293])
mel_spec = np.empty([1000, 128, 1293])
mfcc = np.empty([1000, 10, 1293])
zcr = np.empty([1000, 1293])
spec_c = np.empty([1000, 1293])
chr = np.empty([1000, 12, 1293])

bad_index = []
for i in tqdm(range(len(audio_labels))):
    try:
        
        audio = audio_paths[i]
        y, r = librosa.load(audio)


        stft = librosa.stft(y)
        stft_db = librosa.amplitude_to_db(abs(stft))
        spec[i] = stft_db


        mel = librosa.feature.melspectrogram(y=y)
        mel_db = librosa.power_to_db(mel)
        mel_spec[i] = mel_db


        mfc = librosa.feature.mfcc(y=y, sr=r, n_mfcc=10)
        mfcc[i] = mfc


        zero = librosa.feature.zero_crossing_rate(y)[0]
        zcr[i] = zero


        spec_cent = librosa.feature.spectral_centroid(y=y, sr=r)[0]
        spec_c[i] = spec_cent


        chroma = librosa.feature.chroma_stft(y=y, sr=r, n_chroma=12, n_fft=4096)
        chr[i] = chroma


    except:

        bad_index.append(i)

  0%|          | 0/1000 [00:00<?, ?it/s]

  y, r = librosa.load(audio)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
100%|██████████| 1000/1000 [06:57<00:00,  2.40it/s]


In [6]:
spec.shape

(1000, 1025, 1293)

In [7]:
len(bad_index)

56

In [11]:
bad_index

[142,
 145,
 148,
 149,
 151,
 153,
 154,
 156,
 200,
 202,
 203,
 204,
 205,
 206,
 207,
 208,
 209,
 300,
 301,
 303,
 305,
 308,
 312,
 314,
 316,
 426,
 427,
 429,
 431,
 432,
 433,
 434,
 435,
 436,
 437,
 438,
 439,
 440,
 441,
 442,
 443,
 444,
 445,
 550,
 552,
 554,
 556,
 559,
 561,
 563,
 566,
 927,
 932,
 934,
 936,
 938]

In [8]:
audio_labels = np.delete(audio_labels, bad_index)

In [9]:
audio_labels.shape

(944,)

In [10]:
spec = np.delete(spec, bad_index, 0)

In [11]:
mel_spec = np.delete(mel_spec, bad_index, 0)
mfcc = np.delete(mfcc, bad_index, 0)
zcr = np.delete(zcr, bad_index, 0)
spec_c = np.delete(spec_c, bad_index, 0)
chr = np.delete(chr, bad_index, 0)

In [12]:
spec = spec.astype(np.float32)
mel_spec = mel_spec.astype(np.float32)
mfcc = mfcc.astype(np.float32)
zcr = zcr.astype(np.float32)
spec_c = spec_c.astype(np.float32)
chr = chr.astype(np.float32)

In [13]:
audio_labels[audio_labels == 'blues'] = 0
audio_labels[audio_labels == 'classical'] = 1
audio_labels[audio_labels == 'country'] = 2
audio_labels[audio_labels == 'disco'] = 3
audio_labels[audio_labels == 'hiphop'] = 4
audio_labels[audio_labels == 'jazz'] = 5
audio_labels[audio_labels == 'metal'] = 6
audio_labels[audio_labels == 'pop'] = 7
audio_labels[audio_labels == 'reggae'] = 8
audio_labels[audio_labels == 'rock'] = 9
audio_labels = [int(i) for i in audio_labels]
audio_labels = np.array(audio_labels)

In [14]:
# audio_labels

In [15]:
y = tf.keras.utils.to_categorical(audio_labels,num_classes = 10, dtype ="int32")

In [16]:
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [18]:
np.savez_compressed(os.path.join("../artifacts/data/", "MusicFeatures.npz"), spec= spec, mel= mel_spec, mfcc= mfcc, zcr= zcr, cen= spec_cent, chroma= chr, target=y)