# Music Genre Classification

## Feature Extraction

- In this file, we will do feature extraction for Machine Learning Models
- Features :
  - Amplitude Envelope
  - Root Mean Square Energy
  - Zero Crossing Rate
  - Spectral Centroid
  - Spectral Rolloff
  - Spectral Bandwidth
  - Spectral Contrast
  - Tempo
  - Chroma
  - Tonnetz
  - MFCCs
  - Delta MFCCs
  - Delta Delta MFCCs

### Importing required packages

In [1]:
import os
from pathlib import Path
import librosa
import numpy as np
from scipy.stats import skew 
from scipy.stats import kurtosis 
import pandas as pd

#### Getting Amplitude Envelope
- It is not available in librosa library

In [2]:
def get_amplitude_envelope(y, frame_length, hop_length):
    amplitude_envelope = []
    
    for i in range(0, len(y), hop_length):
        curr_max_amplitude = max(y[i:i+frame_length])
        amplitude_envelope.append(curr_max_amplitude)
    
    return np.array(amplitude_envelope)

#### Getting features 
- We are using 7 statistics for each feature
  - min
  - max
  - mean
  - median
  - std
  - skewness
  - kurtosis

For some reason, we were getting error when we used librosa.feature.rhythm.tempo. We were not getting that error if we used librosa.beat.tempo once

In [3]:
y,sr = librosa.load('raw_data/genres/blues/blues.00000.au')
temp_value = librosa.beat.tempo(y=y)

	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  temp_value = librosa.beat.tempo(y=y)


In [4]:
def get_features(y, label, sr=22050, frame_length=1024, hop_length=512):
    features = {
        'amplitude_envelope' : get_amplitude_envelope(y, frame_length, hop_length),
        'rmse' : librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length),
        'zcr' : librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length),
        'spectral_centroid' : librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=frame_length, hop_length = hop_length),
        'spectral_rolloff' : librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=frame_length, hop_length=hop_length),
        'spectral_bandwidth' : librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=frame_length, hop_length = hop_length)
    }
    spectral_contrast =  librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=frame_length, hop_length=hop_length)
    for i, array in enumerate(spectral_contrast):
        features[f'spectral_contrast_{i}'] = array 
    
    chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=frame_length, hop_length=hop_length)
    for i, array in enumerate(chroma):
        features[f'chroma_{i}'] = array 
        
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
    for i, array in enumerate(tonnetz):
        features[f'tonnetz_{i}'] = array 
    
    mfcc = librosa.feature.mfcc(y=y, n_mfcc=13, sr = sr, n_fft = frame_length, hop_length = hop_length)
    for i, array in enumerate(mfcc):
        features[f'mfcc_{i}'] = array
    
    delta_mfcc = librosa.feature.delta(mfcc)
    for i, array in enumerate(delta_mfcc):
        features[f'delta_{i}'] = array
    
    delta_delta_mfcc = librosa.feature.delta(mfcc, order = 2)
    for i, array in enumerate(delta_delta_mfcc):
        features[f'delta2_{i}'] = array
    
    final_features = {}
    for feature_name, feature_array in features.items():
        final_features[f'{feature_name}_min'] = [np.min(feature_array.squeeze())]
        final_features[f'{feature_name}_max'] = [np.max(feature_array.squeeze())]
        final_features[f'{feature_name}_mean'] = [np.mean(feature_array.squeeze())]
        final_features[f'{feature_name}_median'] = [np.median(feature_array.squeeze())]
        final_features[f'{feature_name}_std'] = [np.std(feature_array.squeeze())]
        final_features[f'{feature_name}_skewness'] = [skew(feature_array.squeeze())]
        final_features[f'{feature_name}_kurtosis'] = [kurtosis(feature_array.squeeze())]
        final_features['tempo'] = librosa.feature.rhythm.tempo(y=y, sr=sr, hop_length=hop_length)
        final_features['label'] = label
        
    return final_features

#### Creating dataframe to store feature values

In [5]:
df = pd.DataFrame(columns=['amplitude_envelope_min','amplitude_envelope_max','amplitude_envelope_mean','amplitude_envelope_median','amplitude_envelope_std','amplitude_envelope_skewness','amplitude_envelope_kurtosis','tempo','rmse_min','rmse_max','rmse_mean','rmse_median','rmse_std','rmse_skewness','rmse_kurtosis','zcr_min','zcr_max','zcr_mean','zcr_median','zcr_std','zcr_skewness','zcr_kurtosis','spectral_centroid_min','spectral_centroid_max','spectral_centroid_mean','spectral_centroid_median','spectral_centroid_std','spectral_centroid_skewness','spectral_centroid_kurtosis','spectral_rolloff_min','spectral_rolloff_max','spectral_rolloff_mean','spectral_rolloff_median','spectral_rolloff_std','spectral_rolloff_skewness','spectral_rolloff_kurtosis','spectral_bandwidth_min','spectral_bandwidth_max','spectral_bandwidth_mean','spectral_bandwidth_median','spectral_bandwidth_std','spectral_bandwidth_skewness','spectral_bandwidth_kurtosis','spectral_contrast_0_min','spectral_contrast_0_max','spectral_contrast_0_mean','spectral_contrast_0_median','spectral_contrast_0_std','spectral_contrast_0_skewness','spectral_contrast_0_kurtosis','spectral_contrast_1_min','spectral_contrast_1_max','spectral_contrast_1_mean','spectral_contrast_1_median','spectral_contrast_1_std','spectral_contrast_1_skewness','spectral_contrast_1_kurtosis','spectral_contrast_2_min','spectral_contrast_2_max','spectral_contrast_2_mean','spectral_contrast_2_median','spectral_contrast_2_std','spectral_contrast_2_skewness','spectral_contrast_2_kurtosis','spectral_contrast_3_min','spectral_contrast_3_max','spectral_contrast_3_mean','spectral_contrast_3_median','spectral_contrast_3_std','spectral_contrast_3_skewness','spectral_contrast_3_kurtosis','spectral_contrast_4_min','spectral_contrast_4_max','spectral_contrast_4_mean','spectral_contrast_4_median','spectral_contrast_4_std','spectral_contrast_4_skewness','spectral_contrast_4_kurtosis','spectral_contrast_5_min','spectral_contrast_5_max','spectral_contrast_5_mean','spectral_contrast_5_median','spectral_contrast_5_std','spectral_contrast_5_skewness','spectral_contrast_5_kurtosis','spectral_contrast_6_min','spectral_contrast_6_max','spectral_contrast_6_mean','spectral_contrast_6_median','spectral_contrast_6_std','spectral_contrast_6_skewness','spectral_contrast_6_kurtosis','chroma_0_min','chroma_0_max','chroma_0_mean','chroma_0_median','chroma_0_std','chroma_0_skewness','chroma_0_kurtosis','chroma_1_min','chroma_1_max','chroma_1_mean','chroma_1_median','chroma_1_std','chroma_1_skewness','chroma_1_kurtosis','chroma_2_min','chroma_2_max','chroma_2_mean','chroma_2_median','chroma_2_std','chroma_2_skewness','chroma_2_kurtosis','chroma_3_min','chroma_3_max','chroma_3_mean','chroma_3_median','chroma_3_std','chroma_3_skewness','chroma_3_kurtosis','chroma_4_min','chroma_4_max','chroma_4_mean','chroma_4_median','chroma_4_std','chroma_4_skewness','chroma_4_kurtosis','chroma_5_min','chroma_5_max','chroma_5_mean','chroma_5_median','chroma_5_std','chroma_5_skewness','chroma_5_kurtosis','chroma_6_min','chroma_6_max','chroma_6_mean','chroma_6_median','chroma_6_std','chroma_6_skewness','chroma_6_kurtosis','chroma_7_min','chroma_7_max','chroma_7_mean','chroma_7_median','chroma_7_std','chroma_7_skewness','chroma_7_kurtosis','chroma_8_min','chroma_8_max','chroma_8_mean','chroma_8_median','chroma_8_std','chroma_8_skewness','chroma_8_kurtosis','chroma_9_min','chroma_9_max','chroma_9_mean','chroma_9_median','chroma_9_std','chroma_9_skewness','chroma_9_kurtosis','chroma_10_min','chroma_10_max','chroma_10_mean','chroma_10_median','chroma_10_std','chroma_10_skewness','chroma_10_kurtosis','chroma_11_min','chroma_11_max','chroma_11_mean','chroma_11_median','chroma_11_std','chroma_11_skewness','chroma_11_kurtosis','tonnetz_0_min','tonnetz_0_max','tonnetz_0_mean','tonnetz_0_median','tonnetz_0_std','tonnetz_0_skewness','tonnetz_0_kurtosis','tonnetz_1_min','tonnetz_1_max','tonnetz_1_mean','tonnetz_1_median','tonnetz_1_std','tonnetz_1_skewness','tonnetz_1_kurtosis','tonnetz_2_min','tonnetz_2_max','tonnetz_2_mean','tonnetz_2_median','tonnetz_2_std','tonnetz_2_skewness','tonnetz_2_kurtosis','tonnetz_3_min','tonnetz_3_max','tonnetz_3_mean','tonnetz_3_median','tonnetz_3_std','tonnetz_3_skewness','tonnetz_3_kurtosis','tonnetz_4_min','tonnetz_4_max','tonnetz_4_mean','tonnetz_4_median','tonnetz_4_std','tonnetz_4_skewness','tonnetz_4_kurtosis','tonnetz_5_min','tonnetz_5_max','tonnetz_5_mean','tonnetz_5_median','tonnetz_5_std','tonnetz_5_skewness','tonnetz_5_kurtosis','mfcc_0_min','mfcc_0_max','mfcc_0_mean','mfcc_0_median','mfcc_0_std','mfcc_0_skewness','mfcc_0_kurtosis','mfcc_1_min','mfcc_1_max','mfcc_1_mean','mfcc_1_median','mfcc_1_std','mfcc_1_skewness','mfcc_1_kurtosis','mfcc_2_min','mfcc_2_max','mfcc_2_mean','mfcc_2_median','mfcc_2_std','mfcc_2_skewness','mfcc_2_kurtosis','mfcc_3_min','mfcc_3_max','mfcc_3_mean','mfcc_3_median','mfcc_3_std','mfcc_3_skewness','mfcc_3_kurtosis','mfcc_4_min','mfcc_4_max','mfcc_4_mean','mfcc_4_median','mfcc_4_std','mfcc_4_skewness','mfcc_4_kurtosis','mfcc_5_min','mfcc_5_max','mfcc_5_mean','mfcc_5_median','mfcc_5_std','mfcc_5_skewness','mfcc_5_kurtosis','mfcc_6_min','mfcc_6_max','mfcc_6_mean','mfcc_6_median','mfcc_6_std','mfcc_6_skewness','mfcc_6_kurtosis','mfcc_7_min','mfcc_7_max','mfcc_7_mean','mfcc_7_median','mfcc_7_std','mfcc_7_skewness','mfcc_7_kurtosis','mfcc_8_min','mfcc_8_max','mfcc_8_mean','mfcc_8_median','mfcc_8_std','mfcc_8_skewness','mfcc_8_kurtosis','mfcc_9_min','mfcc_9_max','mfcc_9_mean','mfcc_9_median','mfcc_9_std','mfcc_9_skewness','mfcc_9_kurtosis','mfcc_10_min','mfcc_10_max','mfcc_10_mean','mfcc_10_median','mfcc_10_std','mfcc_10_skewness','mfcc_10_kurtosis','mfcc_11_min','mfcc_11_max','mfcc_11_mean','mfcc_11_median','mfcc_11_std','mfcc_11_skewness','mfcc_11_kurtosis','mfcc_12_min','mfcc_12_max','mfcc_12_mean','mfcc_12_median','mfcc_12_std','mfcc_12_skewness','mfcc_12_kurtosis','delta_0_min','delta_0_max','delta_0_mean','delta_0_median','delta_0_std','delta_0_skewness','delta_0_kurtosis','delta_1_min','delta_1_max','delta_1_mean','delta_1_median','delta_1_std','delta_1_skewness','delta_1_kurtosis','delta_2_min','delta_2_max','delta_2_mean','delta_2_median','delta_2_std','delta_2_skewness','delta_2_kurtosis','delta_3_min','delta_3_max','delta_3_mean','delta_3_median','delta_3_std','delta_3_skewness','delta_3_kurtosis','delta_4_min','delta_4_max','delta_4_mean','delta_4_median','delta_4_std','delta_4_skewness','delta_4_kurtosis','delta_5_min','delta_5_max','delta_5_mean','delta_5_median','delta_5_std','delta_5_skewness','delta_5_kurtosis','delta_6_min','delta_6_max','delta_6_mean','delta_6_median','delta_6_std','delta_6_skewness','delta_6_kurtosis','delta_7_min','delta_7_max','delta_7_mean','delta_7_median','delta_7_std','delta_7_skewness','delta_7_kurtosis','delta_8_min','delta_8_max','delta_8_mean','delta_8_median','delta_8_std','delta_8_skewness','delta_8_kurtosis','delta_9_min','delta_9_max','delta_9_mean','delta_9_median','delta_9_std','delta_9_skewness','delta_9_kurtosis','delta_10_min','delta_10_max','delta_10_mean','delta_10_median','delta_10_std','delta_10_skewness','delta_10_kurtosis','delta_11_min','delta_11_max','delta_11_mean','delta_11_median','delta_11_std','delta_11_skewness','delta_11_kurtosis','delta_12_min','delta_12_max','delta_12_mean','delta_12_median','delta_12_std','delta_12_skewness','delta_12_kurtosis','delta2_0_min','delta2_0_max','delta2_0_mean','delta2_0_median','delta2_0_std','delta2_0_skewness','delta2_0_kurtosis','delta2_1_min','delta2_1_max','delta2_1_mean','delta2_1_median','delta2_1_std','delta2_1_skewness','delta2_1_kurtosis','delta2_2_min','delta2_2_max','delta2_2_mean','delta2_2_median','delta2_2_std','delta2_2_skewness','delta2_2_kurtosis','delta2_3_min','delta2_3_max','delta2_3_mean','delta2_3_median','delta2_3_std','delta2_3_skewness','delta2_3_kurtosis','delta2_4_min','delta2_4_max','delta2_4_mean','delta2_4_median','delta2_4_std','delta2_4_skewness','delta2_4_kurtosis','delta2_5_min','delta2_5_max','delta2_5_mean','delta2_5_median','delta2_5_std','delta2_5_skewness','delta2_5_kurtosis','delta2_6_min','delta2_6_max','delta2_6_mean','delta2_6_median','delta2_6_std','delta2_6_skewness','delta2_6_kurtosis','delta2_7_min','delta2_7_max','delta2_7_mean','delta2_7_median','delta2_7_std','delta2_7_skewness','delta2_7_kurtosis','delta2_8_min','delta2_8_max','delta2_8_mean','delta2_8_median','delta2_8_std','delta2_8_skewness','delta2_8_kurtosis','delta2_9_min','delta2_9_max','delta2_9_mean','delta2_9_median','delta2_9_std','delta2_9_skewness','delta2_9_kurtosis','delta2_10_min','delta2_10_max','delta2_10_mean','delta2_10_median','delta2_10_std','delta2_10_skewness','delta2_10_kurtosis','delta2_11_min','delta2_11_max','delta2_11_mean','delta2_11_median','delta2_11_std','delta2_11_skewness','delta2_11_kurtosis','delta2_12_min','delta2_12_max','delta2_12_mean','delta2_12_median','delta2_12_std','delta2_12_skewness','delta2_12_kurtosis','label'])

In [6]:
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
root_folder = 'raw_data/genres/'
for genre in genres:
    print(f'Execution started for {genre} genre')
    for root, dirs, files in os.walk(root_folder + genre):
        for i, file in enumerate(files):
            file_path = Path(os.path.join(root, file))
            y, sr = librosa.load(file_path)
            label = genre
            features = get_features(y=y, label=label)
            df1 = pd.DataFrame(features)
            df = pd.concat([df, df1])  
    print(f'Execution complete for {genre} genre') 
    print('-'*30)

Execution started for blues genre


  df = pd.concat([df, df1])


Execution complete for blues genre
------------------------------
Execution started for classical genre
Execution complete for classical genre
------------------------------
Execution started for country genre
Execution complete for country genre
------------------------------
Execution started for disco genre
Execution complete for disco genre
------------------------------
Execution started for hiphop genre
Execution complete for hiphop genre
------------------------------
Execution started for jazz genre
Execution complete for jazz genre
------------------------------
Execution started for metal genre
Execution complete for metal genre
------------------------------
Execution started for pop genre
Execution complete for pop genre
------------------------------
Execution started for reggae genre
Execution complete for reggae genre
------------------------------
Execution started for rock genre
Execution complete for rock genre
------------------------------


In [8]:
df.reset_index(inplace=True)

  df.reset_index(inplace=True)


In [11]:
df.drop(columns=['index'], inplace = True)

### Storing DataFrame to csv file

In [13]:
if not os.path.exists('data'):
    os.mkdir('data')

df.to_csv('data/extracted_features.csv', index = False)