<a href="https://colab.research.google.com/github/ltphy/MusicalNotesClassifier/blob/master/musical_notes_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Mount Data From Drive
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


Import Libraries

In [0]:
import librosa
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

Extracting music notes

Dataset:https://freesound.org/people/Jaz_the_MAN_2/

The dataset consists of 7 musical sound notes:

*   Do
*   Re
*   Mi
*   Fa
*   Son
*   La
*   Si




In [5]:
cmap = plt.get_cmap('inferno')
plt.figure(figsize=(10,10))

musical_notes = 'do re mi fa sol la si'.split(' ')
folder_path = 'gdrive/My Drive/music_notes/'
for note in musical_notes:
  count = 0
  output_folder = os.path.join(folder_path,'img_data',note )
  #create a folder if not exist
  pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)  
  folder_note = os.path.join(folder_path,note)
  print(folder_note)
  files = os.listdir(folder_note)
  print(files)
  for file in files:
    file_path = os.path.join(folder_note,file)
    count+=1
    y, sr = librosa.load(file_path, mono=True, duration=5)
    plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
    plt.axis('off');
    output_file_path =os.path.join(output_folder,note + str(count) + '.png')
    print(output_file_path)
    plt.savefig(output_file_path)
    plt.clf()

gdrive/My Drive/music_notes/do
['do5.wav', 'do4.wav', 'do2.wav', 'do1.wav', 'do3.wav', 'do6.wav']
gdrive/My Drive/music_notes/img_data/do/do1.png
gdrive/My Drive/music_notes/img_data/do/do2.png
gdrive/My Drive/music_notes/img_data/do/do3.png
gdrive/My Drive/music_notes/img_data/do/do4.png
gdrive/My Drive/music_notes/img_data/do/do5.png
gdrive/My Drive/music_notes/img_data/do/do6.png
gdrive/My Drive/music_notes/re
['re3.wav', 're1.wav', 're4.wav', 're2.wav']
gdrive/My Drive/music_notes/img_data/re/re1.png
gdrive/My Drive/music_notes/img_data/re/re2.png
gdrive/My Drive/music_notes/img_data/re/re3.png
gdrive/My Drive/music_notes/img_data/re/re4.png
gdrive/My Drive/music_notes/mi
['mi3.wav', 'mi4.wav', 'mi1.wav', 'mi2.wav']
gdrive/My Drive/music_notes/img_data/mi/mi1.png
gdrive/My Drive/music_notes/img_data/mi/mi2.png
gdrive/My Drive/music_notes/img_data/mi/mi3.png
gdrive/My Drive/music_notes/img_data/mi/mi4.png
gdrive/My Drive/music_notes/fa
['fa2.wav', 'fa1.wav', 'fa3.wav', 'fa4.wav']
gd

<Figure size 720x720 with 0 Axes>

Extracting features from spectrogram

We will extract

Mel-frequency cepstral coefficients (MFCC)(20 in number)

Spectral Centroid,

Zero Crossing Rate

Chroma Frequencies

Spectral Roll-of


In [6]:
header = 'filename chroma_stft spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()
print(header)

['filename', 'chroma_stft', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20', 'label']


Writing data to a CSV file


In [0]:
def write_to_csv(folder_path, file_name):
  csv_path = os.path.join(folder_path,file_name)
  file = open(csv_path,'w',newline ='')
  with file:
    writer = csv.writer(file)
    writer.writerow(header)
  for note in musical_notes:
    image_folder = os.path.join(folder_path,note )
    image_files = os.listdir(image_folder)
    for file in image_files:
      image_path = os.path.join(image_folder,file)
      print(image_path)
      #load audio file
      y, sr = librosa.load(image_path, mono=True, duration= 3)
      #chroma feature
      chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
      #spectral centroid 
      spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
      #spectral bandwidth    
      spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
      #spectral roll off frequency
      rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
      #zero crossing rate
      zcr = librosa.feature.zero_crossing_rate(y)
      #Mel-frequency cepstral coefficients
      mfcc = librosa.feature.mfcc(y=y, sr=sr)
      #values to add to csv
      values = f'{file} {np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
      for e in mfcc:
        values += f' {np.mean(e)}'
      values += f' {note}'
      file = open(csv_path, 'a', newline='')
      with file:
         writer = csv.writer(file)
         writer.writerow(values.split())

In [8]:
input_file = 'data.csv'
write_to_csv(folder_path,input_file)

gdrive/My Drive/music_notes/do/do5.wav
gdrive/My Drive/music_notes/do/do4.wav
gdrive/My Drive/music_notes/do/do2.wav
gdrive/My Drive/music_notes/do/do1.wav
gdrive/My Drive/music_notes/do/do3.wav
gdrive/My Drive/music_notes/do/do6.wav
gdrive/My Drive/music_notes/re/re3.wav
gdrive/My Drive/music_notes/re/re1.wav
gdrive/My Drive/music_notes/re/re4.wav
gdrive/My Drive/music_notes/re/re2.wav
gdrive/My Drive/music_notes/mi/mi3.wav
gdrive/My Drive/music_notes/mi/mi4.wav
gdrive/My Drive/music_notes/mi/mi1.wav
gdrive/My Drive/music_notes/mi/mi2.wav
gdrive/My Drive/music_notes/fa/fa2.wav
gdrive/My Drive/music_notes/fa/fa1.wav
gdrive/My Drive/music_notes/fa/fa3.wav
gdrive/My Drive/music_notes/fa/fa4.wav
gdrive/My Drive/music_notes/sol/sol3.wav
gdrive/My Drive/music_notes/sol/sol4.wav
gdrive/My Drive/music_notes/sol/sol2.wav
gdrive/My Drive/music_notes/sol/sol1.wav
gdrive/My Drive/music_notes/la/la3.wav
gdrive/My Drive/music_notes/la/la4.wav
gdrive/My Drive/music_notes/la/la1.wav
gdrive/My Drive/m

In [9]:
test_file = 'test_data.csv'
test_folder = os.path.join(folder_path,'test')
write_to_csv(test_folder,test_file)

gdrive/My Drive/music_notes/test/do/do7.wav
gdrive/My Drive/music_notes/test/re/re5.wav
gdrive/My Drive/music_notes/test/mi/mi5.wav
gdrive/My Drive/music_notes/test/fa/fa5.wav
gdrive/My Drive/music_notes/test/sol/sol5.wav
gdrive/My Drive/music_notes/test/la/la5.wav
gdrive/My Drive/music_notes/test/si/si5.wav


Analyzing the Data in Pandas

In [17]:
train_path = os.path.join(folder_path,input_file)
data = pd.read_csv(train_path)
data.head()


Unnamed: 0,filename,chroma_stft,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,do5.wav,0.20341,1034.635601,1297.364918,2406.640164,0.052375,-365.895984,105.270306,-22.050027,42.427789,7.646846,-8.252503,0.136139,1.110385,-17.094169,-9.910869,5.205509,-6.569991,-12.231633,-2.03738,0.284443,-3.978355,-4.509466,-7.951682,-12.868644,-8.02921,do
1,do4.wav,0.124815,1171.407928,1234.627843,2420.947266,0.052246,-538.015907,114.644277,-4.811778,47.230737,9.735454,0.914577,7.893248,-8.788845,-0.991606,1.545233,-0.648563,17.650642,19.362541,34.929754,36.930925,3.011775,-12.488895,-28.257724,-9.758384,15.131855,do
2,do2.wav,0.158347,851.982849,955.621954,1359.133911,0.046556,-592.865659,91.286681,-16.027655,25.789301,6.501369,2.493707,7.785337,-7.30397,-9.840526,-5.472376,-15.956434,-14.952687,3.796558,3.034354,-5.393283,-18.194425,-16.037208,-4.045646,-8.29044,-7.369336,do
3,do1.wav,0.164177,1005.318429,1101.728964,1880.240146,0.055131,-490.52342,124.392996,-25.547577,50.144015,2.20161,13.484972,9.559972,-8.839733,-5.876965,-2.808042,-12.787375,-12.090681,7.177546,-4.58706,-2.461571,-12.816951,-14.962033,3.788169,-11.241454,-11.252067,do
4,do3.wav,0.12115,1217.114429,1231.303741,2358.74707,0.054844,-608.612714,71.691655,-8.057984,29.701671,10.348799,-0.546233,5.570629,-8.479808,-4.517708,-1.639596,-0.960709,16.880998,26.791451,39.2982,36.861435,6.888442,-12.916025,-25.914141,-8.168141,14.075324,do


In [18]:
test_path =  os.path.join(test_folder,test_file)
test_data = pd.read_csv(test_path)
test_data.head()

Unnamed: 0,filename,chroma_stft,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,do7.wav,0.13035,1415.558633,1368.636751,2692.500385,0.081659,-439.110736,62.49334,-36.268261,12.290038,-16.274,-20.29897,-14.05299,-19.220514,-19.924624,-11.047416,-6.344697,3.638363,19.070914,40.655711,33.698963,5.705769,-23.083674,-25.868845,-0.351497,11.563337,do
1,re5.wav,0.154451,3022.299619,2581.094047,6020.683594,0.116309,-309.210821,39.380132,-35.65954,-1.323781,-7.599455,9.275709,-4.745408,2.282483,-23.059859,-20.473776,-16.879001,2.795682,-8.139689,-14.272832,-25.350978,-4.097391,-0.458924,7.588152,0.418308,2.998008,re
2,mi5.wav,0.20847,3178.464054,2559.35483,6119.53125,0.147205,-262.226028,37.904758,-35.814984,11.601056,1.426909,10.82175,-7.205181,-7.342268,-20.317858,-11.60565,-8.199233,0.036526,-19.755362,-13.842036,-6.569777,13.872139,-1.987575,3.482113,2.363323,8.815178,mi
3,fa5.wav,0.168808,2555.683871,2523.227248,5259.630358,0.09082,-347.601135,62.185298,-20.006457,3.341738,0.990008,12.03478,-11.40557,-17.974685,-20.802441,-5.365228,-9.429528,-13.407004,-23.697949,-7.132253,1.380908,11.446775,0.986097,0.516837,4.144257,16.99083,fa
4,sol5.wav,0.200155,3138.853092,2542.261821,6073.439941,0.113135,-195.322925,31.873032,-40.595593,14.761221,0.215234,8.662658,-16.754729,-20.283837,-28.373099,-13.644941,-2.834085,-0.149431,-12.441797,12.104109,10.777366,9.269321,-0.702366,23.14438,11.542901,4.865485,sol


In [0]:
data.shape

(35, 27)

In [0]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)
test_data = test_data.drop(['filename'],axis=1)


Encoding the Labels

In [25]:
#encoding input training set
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)
#encoding test set
genre_list = test_data.iloc[:, -1]
encoder = LabelEncoder()
test_y = encoder.fit_transform(genre_list)
print(test_y)

[0 0 0 0 0 0 4 4 4 4 3 3 3 3 1 1 1 1 6 6 6 6 2 2 2 2 5 5 5 5]
[0 4 3 1 6 2 5]


Dividing data into training and Testing set

In [0]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [54]:
scaler = StandardScaler()
test_X = scaler.fit_transform(np.array(test_data.iloc[:, :-1], dtype = float))
print(test_X)
print(test_y)

[[-1.42408376 -2.31454078 -2.41842693 -2.35868454 -1.35734699 -1.85482035
   1.39439546 -0.29318181  0.56293247 -1.91049982 -1.79968447  0.29830166
  -0.65587074  0.05336461 -0.28396977  0.45790099  0.99338531  1.86213386
   1.82558742  1.82470215 -0.48594519 -2.03936126 -2.04904076 -0.09467532
   0.86726959]
 [-0.49094517  0.48192549  0.59391944  0.62056783  0.0679929  -0.02280969
  -0.31518701 -0.20814203 -1.9119383  -0.46365154  0.61067944  1.13517694
   1.78470579 -0.31397883 -1.64288459 -1.45462521  0.83279769 -0.07156659
  -1.13318346 -1.68761744 -2.15840449 -0.06740026  0.21454766  0.02166334
  -0.66689175]
 [ 1.60047152  0.75372324  0.53990842  0.70905217  1.3389352   0.63982848
  -0.42431394 -0.22985786  0.43768165  1.04187737  0.73668324  0.91401069
   0.69230257  0.00729092 -0.36444541  0.12120594  0.3069925  -0.89702544
  -1.10997831 -0.57050237  0.9072708  -0.20063657 -0.06325318  0.31560855
   0.37503879]
 [ 0.0648802  -0.33019993  0.45014956 -0.06069564 -0.9804771  -0.56

In [55]:
print("Len train: ", len(y_train))
print("Len test: ", len(y_test))
print("Train shape: ", len(X_train.shape))
print("Test shape: ", len(y_train.shape))
print(X_train.shape[:])

Len train:  28
Len test:  2
Train shape:  2
Test shape:  1
(28, 25)



Classification with Keras

Building our Network

In [0]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))
model.add(layers.Dropout(0.4))

model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))
model.add(layers.Dropout(0.3))

model.add(layers.Dense(128,  activation='relu'))
model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))

model.add(layers.Dense(64, activation='relu'))
model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))


model.add(layers.Dense(7, activation='softmax'))

In [0]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [64]:
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [89]:

test_loss, test_acc = model.evaluate(X_test,y_test)
train_loss, train_acc = model.evaluate(X_train, y_train)



In [90]:
print('test_acc: ',test_acc)
print('train_acc: ',train_acc)

test_acc:  1.0
train_acc:  1.0


Save output model

In [67]:
print(X_test)
print(y_test)

[[-0.59289949 -1.18190026 -1.37984163 -1.07102958 -0.62478956 -1.48788065
   0.43084119  1.22660828 -0.2964794   1.03834578  0.29023486  0.11440022
  -0.25853722 -0.11408732  0.06263165 -0.08669817 -0.66438796 -0.5114786
  -0.41536783  0.31777111  1.56613945  2.04607497  2.20085887  1.60436346
  -0.5868729 ]
 [-0.32592053  1.1326553   1.32495247  1.28438347 -0.17741183  0.6328692
  -1.04207671 -0.53094342  0.13373931 -0.31876051 -1.53103808 -1.49099711
  -0.92618413 -0.68574809 -0.42764829 -0.54175368 -0.66703569 -0.70235445
  -0.4467981  -0.55703471  0.33928051  1.04571056  1.00459405  1.57639135
   1.03083212]]
[5 6]


In [69]:
predictions = model.predict(X_test)
predictions[1].shape

(7,)

In [71]:
np.sum(predictions[0])

1.0000001

In [73]:

np.argmax(predictions[0])

5

Validation set


In [74]:
print(test_y)

[0 4 3 1 6 2 5]


In [91]:
predictions = model.predict(test_X)
predictions[1].shape
print(predictions)

[[0.3405611  0.31056055 0.04287396 0.12494399 0.14642347 0.01919767
  0.01543934]
 [0.03004034 0.12944601 0.01771532 0.19330592 0.3117096  0.01360088
  0.30418196]
 [0.16756076 0.05909    0.01470815 0.05672877 0.02697715 0.2496405
  0.4252947 ]
 [0.05908111 0.05165888 0.03610993 0.24784349 0.07625093 0.05085375
  0.478202  ]
 [0.289681   0.1247766  0.03845653 0.02057672 0.08344382 0.09136975
  0.35169557]
 [0.07310047 0.05109132 0.27315766 0.13901642 0.23355308 0.1430211
  0.08705993]
 [0.09410299 0.06364977 0.642087   0.01478247 0.02982165 0.11103052
  0.04452555]]


In [93]:
for i in range(0,7):
  print(np.argmax(predictions[i]))

0
4
6
6
6
2
2
