In [8]:
# feature extractoring and preprocessing data
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

In [6]:
# extracting the spectrogram for every audio file
# and save by genre into a directory called img_data
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()


<Figure size 720x720 with 0 Axes>

In [11]:
# extracting features from each spectrogram
# We will extract: 
#   Mel-frequency cepstral coefficients (MFCC)(20 in number)
#   Spectral Centroid
#   Zero Crossing Rate
#   Chroma Frequencies
#   Spectral Roll-off

header = 'filename chroma_stft spectra_centroid spectral_bandwidth rolloff zero_crossing_rate'

for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

# writing data to a csv file
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [12]:
# analyze the data using pandas
data = pd.read_csv('data.csv')

data.head()

Unnamed: 0,filename,chroma_stft,spectra_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
blues.00093.wav,0.37769,0.065906,569.930721,995.407125,927.427725,0.021701,-350.436188,169.545746,31.82037,16.682835,...,1.82169,-5.970891,-5.259567,-0.229211,-1.77685,-3.713751,0.181591,2.07239,-2.896225,blues
blues.00087.wav,0.336773,0.158098,1442.190271,1870.534155,3083.414688,0.050889,-155.504929,125.638863,1.596553,45.80452,...,-0.792893,-7.748057,0.413548,-7.030262,3.997679,-6.256611,0.958227,2.019821,-5.742188,blues
blues.00050.wav,0.40086,0.18238,1945.848425,2082.246626,4175.874749,0.085806,-82.979019,107.052124,-25.320452,57.124989,...,12.539581,-9.762303,2.562253,-6.300853,2.996785,-8.718455,-0.326581,-2.980347,0.7126,blues
blues.00044.wav,0.390212,0.136276,2279.124558,2375.10212,5198.360233,0.09257,-109.509285,86.922409,-8.607987,64.49456,...,11.087481,-5.085794,3.97636,-12.859742,12.343859,0.026216,-0.741568,-5.12662,3.303442,blues
blues.00078.wav,0.414188,0.258052,2333.685108,2227.425609,4942.811778,0.123863,-2.524339,101.252716,-33.924385,41.516888,...,12.506608,-13.368823,6.112817,-9.06589,5.033774,-11.330277,3.166534,-4.567592,-4.033623,blues


In [13]:
data.shape

(1000, 27)

In [16]:
data.shape
data.head()

Unnamed: 0,chroma_stft,spectra_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
blues.00093.wav,0.065906,569.930721,995.407125,927.427725,0.021701,-350.436188,169.545746,31.82037,16.682835,28.710146,...,1.82169,-5.970891,-5.259567,-0.229211,-1.77685,-3.713751,0.181591,2.07239,-2.896225,blues
blues.00087.wav,0.158098,1442.190271,1870.534155,3083.414688,0.050889,-155.504929,125.638863,1.596553,45.80452,0.900778,...,-0.792893,-7.748057,0.413548,-7.030262,3.997679,-6.256611,0.958227,2.019821,-5.742188,blues
blues.00050.wav,0.18238,1945.848425,2082.246626,4175.874749,0.085806,-82.979019,107.052124,-25.320452,57.124989,0.085439,...,12.539581,-9.762303,2.562253,-6.300853,2.996785,-8.718455,-0.326581,-2.980347,0.7126,blues
blues.00044.wav,0.136276,2279.124558,2375.10212,5198.360233,0.09257,-109.509285,86.922409,-8.607987,64.49456,-6.304127,...,11.087481,-5.085794,3.97636,-12.859742,12.343859,0.026216,-0.741568,-5.12662,3.303442,blues
blues.00078.wav,0.258052,2333.685108,2227.425609,4942.811778,0.123863,-2.524339,101.252716,-33.924385,41.516888,-13.804996,...,12.506608,-13.368823,6.112817,-9.06589,5.033774,-11.330277,3.166534,-4.567592,-4.033623,blues
