<a href="https://colab.research.google.com/github/ltphy/MusicalNotesClassifier/blob/master/musical_notes_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [176]:
# Mount Data From Drive
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


Import Libraries

In [0]:
import librosa
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

Extracting music notes

Dataset:https://freesound.org/people/Jaz_the_MAN_2/

The dataset consists of 7 musical sound notes:

*   Do
*   Re
*   Mi
*   Fa
*   Son
*   La
*   Si




In [178]:
cmap = plt.get_cmap('inferno')
plt.figure(figsize=(10,10))

musical_notes = 'do re mi fa sol la si'.split(' ')
folder_path = 'gdrive/My Drive/music_notes/'
for note in musical_notes:
  count = 0
  output_folder = os.path.join(folder_path,'img_data',note )
  pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)  
  folder_note = os.path.join(folder_path,note)
  print(folder_note)
  files = os.listdir(folder_note)
  print(files)
  for file in files:
    file_path = os.path.join(folder_note,file)
    count+=1
    y, sr = librosa.load(file_path, mono=True, duration=5)
    plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
    plt.axis('off');
    output_file_path =os.path.join(output_folder,note + str(count) + '.png')
    print(output_file_path)
    plt.savefig(output_file_path)
    plt.clf()

gdrive/My Drive/music_notes/do
['do2.wav', 'do4.wav', 'do1.wav', 'do3.wav', 'do6.wav', 'do5.wav']
gdrive/My Drive/music_notes/img_data/do/do1.png
gdrive/My Drive/music_notes/img_data/do/do2.png
gdrive/My Drive/music_notes/img_data/do/do3.png
gdrive/My Drive/music_notes/img_data/do/do4.png
gdrive/My Drive/music_notes/img_data/do/do5.png
gdrive/My Drive/music_notes/img_data/do/do6.png
gdrive/My Drive/music_notes/re
['re5.wav', 're2.wav', 're1.wav', 're4.wav', 're3.wav']
gdrive/My Drive/music_notes/img_data/re/re1.png
gdrive/My Drive/music_notes/img_data/re/re2.png
gdrive/My Drive/music_notes/img_data/re/re3.png
gdrive/My Drive/music_notes/img_data/re/re4.png
gdrive/My Drive/music_notes/img_data/re/re5.png
gdrive/My Drive/music_notes/mi
['mi5.wav', 'mi1.wav', 'mi4.wav', 'mi3.wav', 'mi2.wav']
gdrive/My Drive/music_notes/img_data/mi/mi1.png
gdrive/My Drive/music_notes/img_data/mi/mi2.png
gdrive/My Drive/music_notes/img_data/mi/mi3.png
gdrive/My Drive/music_notes/img_data/mi/mi4.png
gdrive/M

<Figure size 720x720 with 0 Axes>

Extracting features from spectrogram

We will extract

Mel-frequency cepstral coefficients (MFCC)(20 in number)

Spectral Centroid,

Zero Crossing Rate

Chroma Frequencies

Spectral Roll-of


In [179]:
header = 'filename chroma_stft spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()
print(header)

['filename', 'chroma_stft', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20', 'label']


Writing data to a CSV file


In [0]:
def write_to_csv(folder_path, file_name):
  csv_path = os.path.join(folder_path,file_name)
  file = open(csv_path,'w',newline ='')
  with file:
    writer = csv.writer(file)
    writer.writerow(header)
  for note in musical_notes:
    image_folder = os.path.join(folder_path,note )
    image_files = os.listdir(image_folder)
    for file in image_files:
      image_path = os.path.join(image_folder,file)
      print(image_path)
      #load audio file
      y, sr = librosa.load(image_path, mono=True, duration= 3)
      #chroma feature
      chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
      #spectral centroid 
      spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
      #spectral bandwidth    
      spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
      #spectral roll off frequency
      rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
      #zero crossing rate
      zcr = librosa.feature.zero_crossing_rate(y)
      #Mel-frequency cepstral coefficients
      mfcc = librosa.feature.mfcc(y=y, sr=sr)
      #values to add to csv
      values = f'{file} {np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
      for e in mfcc:
        values += f' {np.mean(e)}'
      values += f' {note}'
      file = open(csv_path, 'a', newline='')
      with file:
         writer = csv.writer(file)
         writer.writerow(values.split())

In [220]:
input_file = 'data.csv'
write_to_csv(folder_path,input_file)

gdrive/My Drive/music_notes/do/do2.wav
gdrive/My Drive/music_notes/do/do4.wav
gdrive/My Drive/music_notes/do/do1.wav
gdrive/My Drive/music_notes/do/do3.wav
gdrive/My Drive/music_notes/do/do6.wav
gdrive/My Drive/music_notes/do/do5.wav
gdrive/My Drive/music_notes/re/re2.wav
gdrive/My Drive/music_notes/re/re1.wav
gdrive/My Drive/music_notes/re/re4.wav
gdrive/My Drive/music_notes/re/re3.wav
gdrive/My Drive/music_notes/mi/mi1.wav
gdrive/My Drive/music_notes/mi/mi4.wav
gdrive/My Drive/music_notes/mi/mi3.wav
gdrive/My Drive/music_notes/mi/mi2.wav
gdrive/My Drive/music_notes/fa/fa4.wav
gdrive/My Drive/music_notes/fa/fa3.wav
gdrive/My Drive/music_notes/fa/fa1.wav
gdrive/My Drive/music_notes/fa/fa2.wav
gdrive/My Drive/music_notes/sol/sol4.wav
gdrive/My Drive/music_notes/sol/sol3.wav
gdrive/My Drive/music_notes/sol/sol1.wav
gdrive/My Drive/music_notes/sol/sol2.wav
gdrive/My Drive/music_notes/la/la4.wav
gdrive/My Drive/music_notes/la/la3.wav
gdrive/My Drive/music_notes/la/la2.wav
gdrive/My Drive/m

In [0]:
input_file = 'data.csv'
write_to_csv(test_file)

Analyzing the Data in Pandas

In [181]:

data = pd.read_csv(csv_path)
data.head()


Unnamed: 0,filename,chroma_stft,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,do2.wav,0.158347,851.982849,955.621954,1359.133911,0.046556,-592.865659,91.286681,-16.027655,25.789301,6.501369,2.493707,7.785337,-7.30397,-9.840526,-5.472376,-15.956434,-14.952687,3.796558,3.034354,-5.393283,-18.194425,-16.037208,-4.045646,-8.29044,-7.369336,do
1,do4.wav,0.124815,1171.407928,1234.627843,2420.947266,0.052246,-538.015907,114.644277,-4.811778,47.230737,9.735454,0.914577,7.893248,-8.788845,-0.991606,1.545233,-0.648563,17.650642,19.362541,34.929754,36.930925,3.011775,-12.488895,-28.257724,-9.758384,15.131855,do
2,do1.wav,0.164177,1005.318429,1101.728964,1880.240146,0.055131,-490.52342,124.392996,-25.547577,50.144015,2.20161,13.484972,9.559972,-8.839733,-5.876965,-2.808042,-12.787375,-12.090681,7.177546,-4.58706,-2.461571,-12.816951,-14.962033,3.788169,-11.241454,-11.252067,do
3,do3.wav,0.12115,1217.114429,1231.303741,2358.74707,0.054844,-608.612714,71.691655,-8.057984,29.701671,10.348799,-0.546233,5.570629,-8.479808,-4.517708,-1.639596,-0.960709,16.880998,26.791451,39.2982,36.861435,6.888442,-12.916025,-25.914141,-8.168141,14.075324,do
4,do6.wav,0.14719,1365.176287,1508.502313,2624.949981,0.054842,-344.571627,77.201061,-33.96367,49.871048,-17.938416,-1.279743,-13.977491,-4.673605,-13.061176,-17.105556,-13.280224,-5.41982,-6.945437,-8.847891,-6.090019,-3.845276,-16.745094,-14.588734,-5.743793,-6.042477,do


In [182]:
data.shape

(35, 27)

In [0]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)


Encoding the Labels

In [184]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)

[0 0 0 0 0 0 4 4 4 4 4 3 3 3 3 3 1 1 1 1 1 6 6 6 6 6 2 2 2 2 2 5 5 5 5]


Dividing data into training and Testing set

In [0]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [213]:
print("Len train: ", len(y_train))
print("Len test: ", len(y_test))
print("Train shape: ", len(X_train.shape))
print("Test shape: ", len(y_train.shape))
print(X_train.shape[:])

Len train:  33
Len test:  2
Train shape:  2
Test shape:  1
(33, 25)



Classification with Keras

Building our Network

In [0]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))
model.add(layers.Dense(128, activation='relu'))

model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None))
model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(7, activation='softmax'))

In [0]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [190]:
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [191]:

test_loss, test_acc = model.evaluate(X_test,y_test)



In [192]:
print('test_acc: ',test_acc)

test_acc:  0.75


In [193]:
print(X_test)
print(y_test)

[[ 0.9914346  -0.97791049 -0.827574   -1.02326925 -1.36080456 -0.40144225
   1.46224304  1.15876609  1.17231786  0.33860678  0.85298198  1.57648646
   0.22276903  0.58077806 -0.4369171  -0.46578322  0.11922405 -0.81650834
  -0.58388426  0.21807402 -0.66611866 -0.55414709 -0.91628367 -1.12015889
  -0.75648964]
 [-0.71172214 -0.79519922 -0.90267948 -0.78107068 -0.60530529 -1.4154607
   0.40672352  1.10901957  0.00576998  1.02637169  0.03592012  0.36280124
   0.21433272  0.51039075  0.26560611 -0.46686518 -0.9152425  -0.19034323
   0.1897985   1.52344437  2.38812767  2.16928724  1.29597575 -0.41450025
  -1.77804882]
 [ 1.11528109  1.57243209  2.13008793  1.74889098  0.89968845  0.97960166
  -0.49723022  0.29404589 -0.97419139  0.37783109  1.36929363 -0.54046708
  -1.07999757 -1.03031729 -0.1806248  -0.24502815 -0.57923952 -1.22537156
  -0.40804943  0.11669589  0.55599174 -0.1262622  -0.2769749   0.09456628
   0.62482062]
 [-0.69293332 -0.4540543  -0.33757789 -0.22038814 -0.55110063 -0.732

In [199]:
predictions = model.predict(X_test)
predictions[2].shape

(7,)

In [195]:
np.sum(predictions[0])

1.0

In [204]:

np.argmax(predictions[3])

0

Validation set
