In [1]:
#The first step is to extract/create data from the music (.wav) files using librosa
import librosa
import os
import pandas as pd
import pickle
import numpy as np

In [9]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
    
#Open each .wav file, use the audio information to generate useful metrics for our data
#Metrics including:  root mean square energy, spectral_centroid
#spectral_bandwidth, spectral_rolloff, zero_crossing_rate and mfcc (Mel-Frequency Cepstral Coefficients)
dataset = []
for g in genres:
    for song in os.scandir(f'./genres/{g}'):
        genre_label = g
        track, sr = librosa.load(f'./genres/{g}/{song.name}', duration = 30)
        rms = librosa.feature.rms(y = track).reshape(-1,)
        zcr = librosa.feature.zero_crossing_rate(track).reshape(-1,)
        spec_bw = librosa.feature.spectral_bandwidth(y=track, sr=sr).reshape(-1,)
        spec_cent = librosa.feature.spectral_centroid(y = track, sr=sr).reshape(-1,)
        rolloff = librosa.feature.spectral_rolloff(y=track, sr=sr).reshape(-1,)
        mfcc = librosa.feature.mfcc(y=track, sr=sr)
        
        items_to_append = [rms, zcr, spec_bw, spec_cent, rolloff]
        for c in mfcc:
            items_to_append.append(c)
            
        items_to_append.append(genre_label)
        
        dataset.append(items_to_append)

[[-2.11484650e+02 -2.08944809e+02 -1.93908890e+02 ... -9.66379395e+01
  -1.09999146e+02 -8.60244293e+01]
 [ 9.90229645e+01  1.01246780e+02  1.02243958e+02 ...  1.53137741e+02
   1.50079346e+02  1.38842896e+02]
 [-1.06346970e+01 -9.34669876e+00  1.91543400e+00 ... -6.27915421e+01
  -5.07951355e+01 -3.68518906e+01]
 ...
 [ 8.94441223e+00  6.88274527e+00  3.65002537e+00 ... -1.29259109e+00
  -2.85715318e+00 -7.59227753e+00]
 [ 3.07485199e+00  4.56672573e+00  4.78899765e+00 ... -1.30038376e+01
  -7.57468748e+00 -9.73560810e+00]
 [ 5.04152775e+00  2.89663172e+00  9.40246463e-01 ... -1.00848560e+01
  -7.55533791e+00 -3.09786592e-02]]


TypeError: only integer scalar arrays can be converted to a scalar index

In [None]:
pickle.dump(dataset, open( "songData.p", "wb" ) )

In [None]:
data = pickle.load( open( "songData.p", "rb" ) )
raw_data = data.copy()

In [None]:
features = []
label = []
for i in range(len(raw_data)):
    features.append(raw_data[i][:-1])
    label.append(raw_data[i][-1])

In [None]:
feature_df = pd.DataFrame(data = features)
label_df = pd.DataFrame(data = label)
feature_df.head()

In [None]:
feature_df = feature_df.drop(432)
label_df = label_df.drop(432)

In [None]:
# standardizing each feature

# calculating average for each feature
# since each sample has the same number of values, we can just average the averages.
feature_average = []
for c in range(len(feature_df.columns)):
    sub_average = []
    for sample in feature_df.iloc[:][c].values:
        sub_average.append(np.average(sample))
        
    feature_average.append(np.average(sub_average))

# calculating standard deviation
n_population = [] # total number of measurements per feature(accounting for each one's length in time)
feature_stdev = [] # the end goal to calculate for each feature so that I can scale the entire feature
sample_stdev_num = [] # the numerator of the stdev for each sample
sample_stdev_num_sum = [] #the sum of each numerator so that the stdev is calculated for the entire population

for c in range(len(feature_df.columns)):
    n_population.append(len(feature_df)*len(feature_df.iloc[0][c]))
    sample_stdev_num = []
    sample_stdev_num_sum = []
    for sample in feature_df.iloc[:][c].values:
        sample_stdev_num.append(np.square(sample-feature_average[c]))
        
    sample_stdev_num_sum = np.sum(sample_stdev_num)
    feature_stdev.append(np.sqrt(sample_stdev_num_sum/n_population[c]))
     

# standardizing all features now that we know stdev and the mean value
for c in range(len(feature_df.columns)):
    for i in range(len(feature_df.iloc[:][c])):
        feature_df.iloc[i][c] = (feature_df.iloc[i][c]-feature_average[c])/feature_stdev[c]

In [None]:
label_df = pd.get_dummies(label_df)
feature_df.columns =("RMS", "ZCR", "Spec_Bandwidth", "Spec_Centroid", "Spectral_Rolloff", "MFCC1"
                            ,"MFCC2", "MFCC3", "MFCC4", "MFCC5", "MFCC6", "MFCC7"
                            , "MFCC8", "MFCC9", "MFCC10", "MFCC11", "MFCC12", "MFCC13"
                            , "MFCC14", "MFCC15", "MFCC16", "MFCC17", "MFCC18", "MFCC19", "MFCC20")

label_df.columns =("Blues", "Classical", "Country", "Disco", "Hiphop", "Jazz", "Metal", "Pop", "Reggae", "Rock")

In [None]:
pickle.dump( feature_df, open( "preprocessedFeat.p", "wb" ) )
pickle.dump( label_df, open( "preprocessedLabel.p", "wb" ) )