# extract_SimpleFeatures.ipynb

Simple features are extracted from the audio samples: *ZC* (***zero crossings***), *SC* (***spectral centroid***), *SV* (***spectral variance***), *ST* (***static tempo***) using librosa (BPM). The data is restructured of the data and saving to the hard drive in [SimpleFeatures](./SimpleFeatures).

Some data analysis on the features to see if they are seperable for genre classification.

In [1]:
import sys
import os
import numpy as np
import pydub
import pandas as pd
import librosa
import time

In [2]:
# Functions for (simple) feature extraction
# ZC zero crossings
# SC spectral centroid
# SV spectral variance
# ST static tempo using librosa (BPM). 1 number for entire song


def extractZC( data, windowsizeinsamples, windowhopinsamples ):
    totalnumberofwindows = np.floor((len(data)-windowsizeinsamples)/windowhopinsamples)
    numbersignchanges = []
    for ii in np.arange(totalnumberofwindows):
        windowstart = int(ii*windowhopinsamples)
        windowend = int(windowstart + windowhopinsamples)
        datasign = np.sign(data[windowstart:windowend])
        datasignderivative = datasign - np.roll(datasign,1)
        numbersignchanges.append(np.count_nonzero(datasignderivative[1:-1]))
    return np.array(numbersignchanges)

def extractSC( data, windowsizeinsamples, windowhopinsamples, samplerate ):
    totalnumberofwindows = np.floor((len(data)-windowsizeinsamples)/windowhopinsamples)
    centroid = []
    freqs = np.arange(np.ceil(windowsizeinsamples/2.0)+1)*samplerate/windowsizeinsamples
    for ii in np.arange(totalnumberofwindows):
        windowstart = int(ii*windowhopinsamples)
        windowend = int(windowstart + windowsizeinsamples)
        DATA = np.abs(np.fft.fft(data[windowstart:windowend]))
        centroid.append(np.sum(freqs*DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)])/
                        np.sum(DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)]))
    return np.array(centroid)

def extractSV( data, windowsizeinsamples, windowhopinsamples, samplerate ):
    totalnumberofwindows = np.floor((len(data)-windowsizeinsamples)/windowhopinsamples)
    variance = []
    freqs = np.arange(np.ceil(windowsizeinsamples/2.0)+1)*samplerate/windowsizeinsamples
    #freqs = np.power(freqs, trypower)
    for ii in np.arange(totalnumberofwindows):
        windowstart = int(ii*windowhopinsamples)
        windowend = int(windowstart + windowsizeinsamples)
        DATA = np.abs(np.fft.fft(data[windowstart:windowend]))  
        """ 
        centroid = np.sum(freqs*DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)])/np.sum(DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)])
            
        second_moment = np.sum(np.square(freqs)*DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)])/np.sum(DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)])
            
        variance.append( np.subtract(second_moment , centroid)
        """ 
        ind_max = int(np.ceil(windowsizeinsamples/2.0)+1) 
        centroid = np.sum(freqs*DATA[:ind_max])/np.sum(DATA[:ind_max])
        second_moment = np.sum(np.power(freqs, 2)*DATA[:ind_max])/np.sum(DATA[:ind_max])
        variance.append(second_moment - np.power(centroid,2))
    return np.array(variance)

def extractStaticTempo(data, sample_rate):
    data = np.array(data, dtype='float')
    onset_env = librosa.onset.onset_strength(y=data, sr=sample_rate)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sample_rate)
    return tempo

In [3]:
# Extraction of all features. note: tempo extraction is computationally quite heavy

def extract_features(path,
                     ZCframe_size_seconds,
                     ZCwindow_hop_seconds,
                     SCframe_size_seconds,
                     SCwindow_hop_seconds,
                     SVframe_size_seconds,
                     SVwindow_hop_seconds,
                     do_tempoExtraction = False
                    ):
    """
    For some songs with shorter duration the arrays will be filled with -1's
    """
    duration = 60.02938776 # seconds per song. Shorter songs are accounted for later
    sample_rate = 44100
    staticTempo = 0   # initializer for switch
    ZCwindowsizeinsamples = ZCframe_size_seconds * sample_rate
    ZCwindowhopinsamples =  ZCwindow_hop_seconds * sample_rate   
    SCwindowsizeinsamples = SCframe_size_seconds * sample_rate
    SCwindowhopinsamples =  SCwindow_hop_seconds * sample_rate   
    SVwindowsizeinsamples = SVframe_size_seconds * sample_rate
    SVwindowhopinsamples =  SVwindow_hop_seconds * sample_rate   
    
    N_samples = 100
    ZC_frames = int(np.floor((duration*sample_rate-ZCwindowsizeinsamples)/ZCwindowhopinsamples))
    SC_frames = int(np.floor((duration*sample_rate-SCwindowsizeinsamples)/SCwindowhopinsamples))
    SV_frames = int(np.floor((duration*sample_rate-SVwindowsizeinsamples)/SVwindowhopinsamples))

    ZC_array = np.full((N_samples, ZC_frames), np.nan)
    SC_array = np.full((N_samples, SC_frames), np.nan)
    SV_array = np.full((N_samples, SV_frames), np.nan)
    staticTempo_array = np.full((N_samples), np.nan)
    

    for file in os.scandir(path):
            
        if file.path.endswith(".mp3"):
            print("analyzing ", file.name, file.path)

            int_track = int(file.name[:-4])
            
            start = time.time()
            sound = pydub.AudioSegment.from_file(path + "/" + file.name, format = "mp3")  
            samples = sound.get_array_of_samples()
            end = time.time() 
            print(f'import takes {end-start}')
            
            start = time.time()
            ZC = extractZC( samples, ZCwindowsizeinsamples, ZCwindowhopinsamples )  
            end = time.time()
            print(f'extracting ZC takes {end-start} per song')


            start = time.time()
            SC = extractSC( samples, SCwindowsizeinsamples, SCwindowhopinsamples, sample_rate )
            end = time.time()
            print(f'extracting SC takes {end-start} per song')

            start = time.time()
            SV = extractSV( samples, SVwindowsizeinsamples, SVwindowhopinsamples, sample_rate )
            end = time.time()
            print(f'extracting SV takes {end-start} per song')

            if do_tempoExtraction:
                start = time.time()
                staticTempo = extractStaticTempo(samples, sample_rate)
                end = time.time()
                print(f'extracting tempo takes {end-start} per song')
            
            if np.shape(ZC)[0] < ZC_frames:
                ZC = np.append(ZC, np.full((ZC_frames-np.shape(ZC)[0]), np.nan))
                            
            elif np.shape(ZC)[0] > ZC_frames:
                ZC = ZC[:ZC_frames]
             
            if np.shape(SC)[0] < SC_frames:
                SC = np.append(SC, np.full((SC_frames-np.shape(SC)[0]), np.nan))

            elif np.shape(SC)[0] > SC_frames:
                SC = SC[:SC_frames]
                
            if np.shape(SV)[0] < SV_frames:
                SV = np.append(SV, np.full((SV_frames-np.shape(SV)[0]), np.nan))

            elif np.shape(SV)[0] > SV_frames:
                SV = SV[:SV_frames]
                
            ZC_array[int_track-1, :] = ZC
            SC_array[int_track-1, :] = SC
            SV_array[int_track-1, :] = SV
            staticTempo_array[int_track-1] = staticTempo
    
    if do_tempoExtraction:
        return ZC_array, SC_array, SV_array, staticTempo_array
    return ZC_array, SC_array, SV_array

In [4]:
# Structuring data and saving

path = "../data/emotifymusic/"
genres = ["classical", "electronic", "pop", "rock"]

ZC = []
SC = []
SV = []
staticTempo = []


for genre in genres:
    ZC_array, SC_array, SV_array = extract_features(path+genre,
                                                    ZCframe_size_seconds=0.200,
                                                    ZCwindow_hop_seconds=0.100,
                                                    SCframe_size_seconds=0.200,
                                                    SCwindow_hop_seconds=0.100,
                                                    SVframe_size_seconds=0.200,
                                                    SVwindow_hop_seconds=0.100,
                                                    do_tempoExtraction=False
                                                    )
    ZC.append(ZC_array)
    SC.append(SC_array)
    SV.append(SV_array)
    #staticTempo.append(staticTempo_array)


# np.savez_compressed('zeroCrossings_frame100ms_hop50ms.npz', classical=ZC[0], electronic=ZC[1], pop=ZC[2], rock=ZC[3])
# np.savez_compressed('spectralCentroid_frame100ms_hop50ms.npz', classical=SC[0], electronic=SC[1], pop=SC[2], rock=SC[3])
# np.savez_compressed('spectralVariance_frame100ms_hop50ms.npz', classical=SV[0], electronic=SV[1], pop=SV[2], rock=SV[3])
# np.savez_compressed('staticTempoLibrosa.npz',
#                     classical=staticTempo[0], electronic=staticTempo[1], pop=staticTempo[2], rock=staticTempo[3])


ZC_reshape = np.reshape(ZC, (np.shape(ZC)[0]*np.shape(ZC)[1], np.shape(ZC)[2]))
SC_reshape = np.reshape(SC, (np.shape(SC)[0]*np.shape(SC)[1], np.shape(SC)[2]))
SV_reshape = np.reshape(SV, (np.shape(SV)[0]*np.shape(SV)[1], np.shape(SV)[2]))
staticTempo_reshape = np.reshape(staticTempo, (np.shape(staticTempo)[0]*np.shape(staticTempo)[1]))

np.save('zeroCrossings_frame200ms_hop100ms.npy', ZC_reshape)
np.save('spectralCentroid_frame200ms_hop100ms.npy', SC_reshape)
np.save('spectralVariance_frame200ms_hop100ms.npy', SV_reshape)

#np.save('zeroCrossings_frame100ms_hop50ms.npy', ZC_reshape)
#np.save('spectralCentroid_frame100ms_hop50ms.npy', SC_reshape)
#np.save('spectralVariance_frame5000ms_hop2500ms.npy', SV_reshape)
#np.save('staticTempoLibrosa.npy', staticTempo_reshape)

analyzing  1.mp3 ../data/emotifymusic/classical\1.mp3
import takes 0.4676327705383301
extracting ZC takes 0.09474802017211914 per song
extracting SC takes 0.5026557445526123 per song
extracting SV takes 0.8716669082641602 per song
analyzing  10.mp3 ../data/emotifymusic/classical\10.mp3
import takes 0.4546220302581787
extracting ZC takes 0.08776473999023438 per song
extracting SC takes 0.5056846141815186 per song
extracting SV takes 0.8686776161193848 per song
analyzing  100.mp3 ../data/emotifymusic/classical\100.mp3
import takes 0.44479823112487793
extracting ZC takes 0.0917518138885498 per song
extracting SC takes 0.5156126022338867 per song
extracting SV takes 0.8696739673614502 per song
analyzing  11.mp3 ../data/emotifymusic/classical\11.mp3
import takes 0.47154831886291504
extracting ZC takes 0.12566328048706055 per song
extracting SC takes 0.548569917678833 per song
extracting SV takes 0.8766543865203857 per song
analyzing  12.mp3 ../data/emotifymusic/classical\12.mp3
import takes

  centroid.append(np.sum(freqs*DATA[:int(np.ceil(windowsizeinsamples/2.0)+1)])/


extracting SC takes 0.5206453800201416 per song


  centroid = np.sum(freqs*DATA[:ind_max])/np.sum(DATA[:ind_max])
  second_moment = np.sum(np.power(freqs, 2)*DATA[:ind_max])/np.sum(DATA[:ind_max])


extracting SV takes 0.8796103000640869 per song
analyzing  53.mp3 ../data/emotifymusic/classical\53.mp3
import takes 0.44800686836242676
extracting ZC takes 0.10077786445617676 per song
extracting SC takes 0.513582706451416 per song
extracting SV takes 0.8727023601531982 per song
analyzing  54.mp3 ../data/emotifymusic/classical\54.mp3
import takes 0.4514131546020508
extracting ZC takes 0.0927882194519043 per song
extracting SC takes 0.5056502819061279 per song
extracting SV takes 0.8656830787658691 per song
analyzing  55.mp3 ../data/emotifymusic/classical\55.mp3
import takes 0.44335436820983887
extracting ZC takes 0.08776354789733887 per song
extracting SC takes 0.5195753574371338 per song
extracting SV takes 0.878650426864624 per song
analyzing  56.mp3 ../data/emotifymusic/classical\56.mp3
import takes 0.4463498592376709
extracting ZC takes 0.08775520324707031 per song
extracting SC takes 0.5036530494689941 per song
extracting SV takes 0.8649888038635254 per song
analyzing  57.mp3 ../

IndexError: tuple index out of range

In [5]:
np.save('zeroCrossings_frame200ms_hop100ms.npy', ZC_reshape)
np.save('spectralCentroid_frame200ms_hop100ms.npy', SC_reshape)
np.save('spectralVariance_frame200ms_hop100ms.npy', SV_reshape)

In [None]:
# Loading previous data
# Some data is saved as npz file, containing 4 arrays for every genre. Then use the loop to unpack
# Later data was saved as 400 x ... as npy file

genres = ["classical", "electronic", "pop", "rock"]

ZCnpz = np.load('zeroCrossings_frame100ms_hop50ms.npz')
SCnpz = np.load('spectralCentroid_frame100ms_hop50ms.npz')
SVnpz = np.load('spectralVariance_frame100ms_hop50ms.npz')
STnpz = np.load('staticTempoLibrosa.npz')

ZC = []
SC = []
SV = []
staticTempo = []

for genre in genres:
    ZC.append(ZCnpz[genre])
    SC.append(SCnpz[genre])
    SV.append(SVnpz[genre])
    staticTempo.append(STnpz[genre])

In [None]:
# Reshaping lists containing one array per genre to a single array of dimension 400 x . 

ZC_reshape = np.reshape(ZC, (np.shape(ZC)[0]*np.shape(ZC)[1], np.shape(ZC)[2]))
SC_reshape = np.reshape(SC, (np.shape(SC)[0]*np.shape(SC)[1], np.shape(SC)[2]))
SV_reshape = np.reshape(SV, (np.shape(SV)[0]*np.shape(SV)[1], np.shape(SV)[2]))
staticTempo_reshape = np.reshape(staticTempo, (np.shape(staticTempo)[0]*np.shape(staticTempo)[1]))

----------------------------

In [None]:
# Scatter plots for feature exploration

import matplotlib.pyplot as plt

fig,ax = plt.subplots()
ax.scatter(ZC[0].flatten(), SV[0].flatten(), marker='x', alpha=0.1)
# ax.scatter(ZC[1].flatten(), SC[1].flatten(), marker='o', alpha=0.1)
# ax.scatter(ZC[2].flatten(), SC[2].flatten(), marker='^', alpha=0.1)
ax.scatter(ZC[3].flatten(), SV[3].flatten(), marker='*', alpha=0.1)


# ax.set_xlim(0,10000)
# ax.set_ylim(0,20000)

In [None]:
fig,ax = plt.subplots()
ax.scatter(np.mean(ZC[0], axis=1), staticTempo[0], marker='x', alpha=0.5)
ax.scatter(np.mean(ZC[1], axis=1), staticTempo[1], marker='o', alpha=0.5)
ax.scatter(np.mean(ZC[2], axis=1), staticTempo[2], marker='^', alpha=0.5)
ax.scatter(np.mean(ZC[3], axis=1), staticTempo[3], marker='*', alpha=0.5)


# ax.set_xlim(0,10000)
# ax.set_ylim(0,20000)

In [None]:
# From top to bottom: genre
# From left to right: zero crossings, spectral centroid, spectral variance, static tempo extimation by librosa
# For the first three the mean is taken over all windows (can be changed!)


fig, [[ax1, ax2, ax3, ax4], [ax5, ax6, ax7, ax8], [ax9, ax10, ax11, ax12], [ax13, ax14, ax15, ax16]] = plt.subplots(4,4, figsize=(14,14))

# fig, [[ax1, ax2, ax3], [ax4, ax5, ax6]] = plt.subplots(2,3)


ax1.hist(np.mean(ZC[0], axis=1),  bins=50, density=True, color='blue')
ax2.hist(np.mean(SC[0], axis=1),  bins=50, density=True, color='blue')
ax3.hist(np.mean(SV[0], axis=1),  bins=50, density=True, color='blue')
ax4.hist(staticTempo[0] ,  bins=50, density=True, color='blue')


ax5.hist(np.mean(ZC[1], axis=1), bins=50, density=True, color='orange')
ax6.hist(np.mean(SC[1], axis=1), bins=50, density=True, color='orange')
ax7.hist(np.mean(SV[1], axis=1), bins=50, density=True, color='orange')
ax8.hist(staticTempo[1],  bins=50, density=True, color='orange')


ax9.hist(np.mean(ZC[2], axis=1),  bins=50, density=True, color='red')
ax10.hist(np.mean(SC[2], axis=1), bins=50, density=True, color='red')
ax11.hist(np.mean(SV[2], axis=1), bins=50, density=True, color='red')
ax12.hist(staticTempo[2],  bins=50, density=True, color='red')


ax13.hist(np.mean(ZC[3], axis=1), bins=50, density=True, color='green')
ax14.hist(np.mean(SC[3], axis=1), bins=50, density=True, color='green')
ax15.hist(np.mean(SV[3], axis=1), bins=50, density=True, color='green')
ax16.hist(staticTempo[3],  bins=50, density=True, color='green')


ax1.set_xlim((0,2000))
ax5.set_xlim((0,2000))
ax9.set_xlim((0,2000))
ax13.set_xlim((0,2000))

ax2.set_xlim((0,15000))
ax6.set_xlim((0,15000))
ax10.set_xlim((0,15000))
ax14.set_xlim((0,15000))

ax3.set_xlim((0,1.5e8))
ax7.set_xlim((0,1.5e8))
ax11.set_xlim((0,1.5e8))
ax15.set_xlim((0,1.5e8))

ax4.set_xlim((80,175))
ax8.set_xlim((80,175))
ax12.set_xlim((80,175))
ax16.set_xlim((80,175))

In [None]:
# From top to bottom: genre
# From left to right: zero crossings, spectral centroid, spectral variance, static tempo extimation by librosa
# No mean taken


fig, [[ax1, ax2, ax3, ax4], [ax5, ax6, ax7, ax8], [ax9, ax10, ax11, ax12], [ax13, ax14, ax15, ax16]] = plt.subplots(4,4, figsize=(14,14))

# fig, [[ax1, ax2, ax3], [ax4, ax5, ax6]] = plt.subplots(2,3)


ax1.hist(ZC[0].flatten(),  bins=50, density=True, color='blue')
ax2.hist(SC[0].flatten(),  bins=50, density=True, color='blue')
ax3.hist(SV[0].flatten(),  bins=50, density=True, color='blue')
ax4.hist(staticTempo[0] ,  bins=50, density=True, color='blue')


ax5.hist(ZC[1].flatten(), bins=50, density=True, color='orange')
ax6.hist(SC[1].flatten(), bins=50, density=True, color='orange')
ax7.hist(SV[1].flatten(), bins=50, density=True, color='orange')
ax8.hist(staticTempo[1],  bins=50, density=True, color='orange')


ax9.hist(ZC[2].flatten(),  bins=50, density=True, color='red')
ax10.hist(SC[2].flatten(), bins=50, density=True, color='red')
ax11.hist(SV[2].flatten(), bins=50, density=True, color='red')
ax12.hist(staticTempo[2],  bins=50, density=True, color='red')


ax13.hist(ZC[3].flatten(), bins=50, density=True, color='green')
ax14.hist(SC[3].flatten(), bins=50, density=True, color='green')
ax15.hist(SV[3].flatten(), bins=50, density=True, color='green')
ax16.hist(staticTempo[3],  bins=50, density=True, color='green')


ax1.set_xlim((0,2000))
ax5.set_xlim((0,2000))
ax9.set_xlim((0,2000))
ax13.set_xlim((0,2000))

ax2.set_xlim((0,15000))
ax6.set_xlim((0,15000))
ax10.set_xlim((0,15000))
ax14.set_xlim((0,15000))

ax3.set_xlim((0,1.5e8))
ax7.set_xlim((0,1.5e8))
ax11.set_xlim((0,1.5e8))
ax15.set_xlim((0,1.5e8))

ax4.set_xlim((80,175))
ax8.set_xlim((80,175))
ax12.set_xlim((80,175))
ax16.set_xlim((80,175))

In [None]:
# fig, [[ax1, ax2, ax3], [ax4, ax5, ax6], [ax7, ax8, ax9], [ax10, ax11, ax12]] = plt.subplots(4,3, figsize=(10,10))

fig, [[ax1, ax2, ax3], [ax4, ax5, ax6]] = plt.subplots(2,3,  figsize=(10,10))


ax1.hist(ZC_5_2[0].flatten(),  bins=50, density=True, color='blue')
ax2.hist(SC_5_2[0].flatten(),  bins=50, density=True, color='blue')
ax3.hist(SV_5_2[0].flatten(),  bins=50, density=True, color='blue')

ax4.hist(ZC_5_2[1].flatten(), bins=50, density=True, color='orange')
ax5.hist(SC_5_2[1].flatten(), bins=50, density=True, color='orange')
ax6.hist(SV_5_2[1].flatten(), bins=50, density=True, color='orange')

# ax7.hist(ZC_5_2[2].flatten(),  bins=50, density=True, color='red')
# ax8.hist(SC_5_2[2].flatten(),  bins=50, density=True, color='red')
# ax9.hist(SV_5_2[2].flatten(),  bins=50, density=True, color='red')

# ax10.hist(ZC_5_2[3].flatten(), bins=50, density=True, color='green')
# ax11.hist(SC_5_2[3].flatten(), bins=50, density=True, color='green')
# ax12.hist(SV_5_2[3].flatten(), bins=50, density=True, color='green')

ax1.set_xlim((0,80000))
ax4.set_xlim((0,80000))
ax2.set_xlim((0,15000))
ax5.set_xlim((0,15000))
ax3.set_xlim((0,1.5e8))
ax6.set_xlim((0,1.5e8))

In [None]:
fig, [[ax1, ax2, ax3], [ax4, ax5, ax6]] = plt.subplots(2,3,  figsize=(10,10))


ZC_means0 = np.mean(ZC[0], axis=1)
ZC_means1 = np.mean(ZC[1], axis=1)

SC_means0 = np.mean(SC[0], axis=1)
SC_means1 = np.mean(SC[1], axis=1)

SV_means0 = np.mean(SV[0], axis=1)
SV_means1 = np.mean(SV[1], axis=1)


ax1.hist(ZC_means0,  bins=50, density=True, color='blue')
ax2.hist(SC_means0,  bins=50, density=True, color='blue')
ax3.hist(SV_means0,  bins=50, density=True, color='blue')

ax4.hist(ZC_means1, bins=50, density=True, color='orange')
ax5.hist(SC_means1, bins=50, density=True, color='orange')
ax6.hist(SV_means1, bins=50, density=True, color='orange')


# ax2.hist(SC[0][song,:],  bins=50, density=True, color='blue')
# ax3.hist(SV[0][song,:],  bins=50, density=True, color='blue')

# ax4.hist(ZC_means1, bins=50, density=True, color='orange')
# ax5.hist(SC[1][song,:], bins=50, density=True, color='orange')
# ax6.hist(SV[1][song,:], bins=50, density=True, color='orange')

ax1.set_xlim((0,2000))
ax4.set_xlim((0,2000))
ax2.set_xlim((0,15000))
ax5.set_xlim((0,15000))
ax3.set_xlim((0,1.5e8))
ax6.set_xlim((0,1.5e8))

--------------------------

In [None]:
# Dataframes for further feature exploration

d = {'ZC': ZC_reshape, 'SC': SC_reshape, 'SV': SV_reshape, 'StaticTempo': staticTempo_reshape}
features_dict_means = {'ZC': np.nanmean(ZC_reshape, axis=1),
           'SC': np.nanmean(SC_reshape, axis=1),
           'SV': np.nanmean(SV_reshape, axis=1),
           'StaticTempo': staticTempo_reshape}
features_df = pd.DataFrame(data=features_dict_means)
features_df

In [None]:
ones = np.ones(100, dtype='int')
labels_dict = {'label': np.concatenate((ones, ones*2, ones*3, ones*4))}
labels_df = pd.DataFrame(labels_dict)
labels_df

In [None]:
# All features in a scatter matrix

from matplotlib import colors
cmap = colors.ListedColormap(['k', 'g', 'b', 'orange'], 4)

F = features_df
C = labels_df.to_numpy()

pd.plotting.scatter_matrix(F, c=C, cmap=cmap, alpha=1, figsize=(15,15), diagonal = 'hist')
plt.legend('classical')

In [None]:
# All features in a scatter matrix for classical and rock. They seem seperable to some extent!!

cmap = colors.ListedColormap(['k', 'g', 'b', 'orange'], 4)

F = features_df[(labels_df['label'] == 1) | (labels_df['label'] == 4)]
C = labels_df[(labels_df['label'] == 1) | (labels_df['label'] == 4)].to_numpy()

pd.plotting.scatter_matrix(F, c=C, cmap=cmap, alpha=1, figsize=(15,15), diagonal = 'hist')
plt.legend('classical')