In [42]:
import numpy as np
import pandas as pd

import IPython.display as ipd

import librosa
import librosa.display

import matplotlib.pyplot as plt

import cv2
%matplotlib inline
import numpy as np
import matplotlib
# prikaz vecih slika 
matplotlib.rcParams['figure.figsize'] = 16,12

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
from sklearn import metrics

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, Conv3D, BatchNormalization, MaxPool2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import Adam
from keras.utils import to_categorical

from keras.callbacks import ModelCheckpoint
from datetime import datetime

In [43]:
def extract_spectogram(file_name):
    try:
        audio, sample_rate = librosa.load(file_name,res_type="kaiser_fast" ,sr=22050)
        a, index = librosa.effects.trim(audio, top_db=30, frame_length=2048, hop_length=512)
        y_out = a[:44100]
        spectrogram = librosa.feature.melspectrogram(y=y_out, sr=sample_rate, n_fft=2048, hop_length=1024)
        spec_shape = spectrogram.shape
        if(spec_shape[1] < 44):
            print(spec_shape)
            print(file_name)
        spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
        return spectrogram
    
    except Exception as e:
        print(e)
        print("Error encountered while parsing file: ", file_name)
        return None
    
    return mfccscaled    

def my_rgb2gray(img_rgb):
    img_gray = np.ndarray((img_rgb.shape[0], img_rgb.shape[1]))  # zauzimanje memorije za sliku (nema trece dimenzije)
    img_gray = 0.21*img_rgb[:, :] + 0.72*img_rgb[:, :] #+ 0.07*img_rgb[:, :, 2]
    return img_gray

def normalize_gray(array):
    return (array - array.min())/(array.max() - array.min())

In [44]:
folder_path = "dataset/"
folder_list = ["Accordion","Clarinet_Bb","Contrabass","Horn","Viola","Violin","Violoncello"]
features = []
onlyfiles = []

from os import listdir
from os.path import isfile, join

for folder_inst in folder_list:
    onlyfiles = onlyfiles + [f for f in listdir("dataset/" + folder_inst) if isfile(join("dataset/" + folder_inst + "/", f))]
    


for file in onlyfiles: 
    dodatak = ''
    if (file.split("-")[0] == "Acc"):
        dodatak = "Accordion"
    elif (file.split("-")[0] == "ClBb"):
        dodatak = "Clarinet_Bb"
    elif (file.split("-")[0] == "Cb"):
        dodatak = "Contrabass"
    elif (file.split("-")[0] == "Hn"):
        dodatak = "Horn"
    elif (file.split("-")[0] == "Va"):
        dodatak = "Viola"
    elif (file.split("-")[0] == "Vn"):
        dodatak = "Violin"
    elif (file.split("-")[0] == "Vc"):
        dodatak = "Violoncello"
    
    file_name = folder_path + dodatak + "/" + file
    a = file.split("-")
    instrument = a[0]
    pitch = a[2]
    if(len(pitch) == 3):
        pitch = pitch[:2]
    else:
        pitch = pitch[:1]
    data = extract_spectogram(file_name)
    features.append([file_name, data, instrument, pitch])    

In [45]:
#menjanje boje slike
features_gray = []
for list in features:
    imggray = my_rgb2gray(list[1])
    features_gray.append([list[0],normalize_gray(imggray), list[2], list[3]])
    
features_df = pd.DataFrame(features_gray, columns=["file", "feature", "instrument", "pitch"])
features_df.head()

Unnamed: 0,file,feature,instrument,pitch
0,dataset/Accordion/Acc-ord-A#3-ff-alt1-N.wav,"[[0.29073387, 0.33191147, 0.2776536, 0.2996274...",Acc,A#
1,dataset/Accordion/Acc-ord-A#3-ff-alt2-N.wav,"[[0.3070023, 0.3617015, 0.27213687, 0.36119562...",Acc,A#
2,dataset/Accordion/Acc-ord-A#3-ff-N-N.wav,"[[0.47353348, 0.42426324, 0.3613015, 0.4068834...",Acc,A#
3,dataset/Accordion/Acc-ord-A#3-mf-alt1-N.wav,"[[0.42782307, 0.46233648, 0.38736117, 0.385382...",Acc,A#
4,dataset/Accordion/Acc-ord-A#3-mf-alt2-N.wav,"[[0.6518973, 0.62134737, 0.5658642, 0.48233718...",Acc,A#


In [60]:
a_ = 0
a = 0 
b = 0 
c_ = 0
c = 0
d_ = 0
d = 0
e = 0
f_ = 0
f = 0
g_ = 0
g = 0
for x in features_gray:
    if(x[3] == "A#"):
        a_+=1
    if(x[3] == "A"):
        a+=1
    if(x[3] == "B"):
        b+=1
    if(x[3] == "C#"):
        c_+=1
    if(x[3] == "C"):
        c+=1
    if(x[3] == "D#"):
        d_+=1
    if(x[3] == "D"):
        d+=1
    if(x[3] == "E"):
        e+=1
    if(x[3] == "F"):
        f+=1
    if(x[3] == "F#"):
        f_+=1
    if(x[3] == "G"):
        g+=1
    if(x[3] == "G#"):
        g_+=1

In [61]:
print("A: ", a_)
print("A#: ", a)
print("B: ", b)
print("C: ", c)
print("C#: ", c_)
print("D: ", d)
print("D#: ", d_)
print("E: ", e)
print("F: ", f)
print("F#: ", f_)
print("G: ", g)
print("G#: ", g_)

A:  149
A#:  150
B:  154
C:  165
C#:  145
D:  160
D#:  158
E:  163
F:  151
F#:  142
G:  161
G#:  154


In [39]:
X = np.array(features_df.feature.tolist())
yinst = np.array(features_df.pitch.tolist())

leinst = LabelEncoder()
yyinst = leinst.fit_transform(yinst)

x_train, x_test, y_train, y_test = train_test_split(X, yyinst, test_size=0.2, shuffle=True, random_state=42)

x_test, x_validation, y_test, y_validation = train_test_split(x_test, y_test, test_size=0.5, shuffle=True, random_state=42)

x_train = x_train.reshape(len(x_train),128,44,1)
x_test = x_test.reshape(len(x_test),128,44,1)
x_validation = x_validation.reshape(len(x_validation),128,44,1)
print(x_train.shape)
print(x_test.shape)
print(x_validation.shape)


num_classes = len(leinst.classes_)
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

(1161, 128, 44, 1)
(145, 128, 44, 1)
(146, 128, 44, 1)


In [6]:
num_classes

12

In [40]:
model1 = Sequential()
model1.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128,44,1)))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Dropout(0.25))
model1.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Dropout(0.25))
model1.add(Flatten())
#model.add(Dense(128, activation='relu'))
#model.add(Dropout(0.5))
model1.add(Dense(num_classes, activation='softmax'))
#Compile
model1.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
print(model1.summary())

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 126, 42, 32)       320       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 63, 21, 32)        0         
_________________________________________________________________
dropout_22 (Dropout)         (None, 63, 21, 32)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 61, 19, 64)        18496     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 30, 9, 64)         0         
_________________________________________________________________
dropout_23 (Dropout)         (None, 30, 9, 64)         0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 17280)           

In [31]:
model2 = Sequential()
model2.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128,44,1)))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.25))
model2.add(Flatten())
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(num_classes, activation='softmax'))
model2.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
print(model2.summary())

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 126, 42, 32)       320       
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 63, 21, 32)        0         
_________________________________________________________________
dropout_18 (Dropout)         (None, 63, 21, 32)        0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 42336)             0         
_________________________________________________________________
dense_11 (Dense)             (None, 128)               5419136   
_________________________________________________________________
dropout_19 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 12)              

In [41]:
num_epochs = 60
batch_size = 32

checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=1, save_best_only=True)

start = datetime.now()
model1.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_data=(x_test, y_test), 
           callbacks=[checkpointer], verbose=1)

duration = datetime.now() - start
print("Training completed in: ", duration)

Train on 1161 samples, validate on 145 samples
Epoch 1/60

Epoch 00001: val_loss improved from inf to 0.73617, saving model to best_weights.hdf5
Epoch 2/60

Epoch 00002: val_loss improved from 0.73617 to 0.36471, saving model to best_weights.hdf5
Epoch 3/60

Epoch 00003: val_loss improved from 0.36471 to 0.28150, saving model to best_weights.hdf5
Epoch 4/60

Epoch 00004: val_loss improved from 0.28150 to 0.22231, saving model to best_weights.hdf5
Epoch 5/60

Epoch 00005: val_loss improved from 0.22231 to 0.16533, saving model to best_weights.hdf5
Epoch 6/60

Epoch 00006: val_loss improved from 0.16533 to 0.14376, saving model to best_weights.hdf5
Epoch 7/60

Epoch 00007: val_loss improved from 0.14376 to 0.10724, saving model to best_weights.hdf5
Epoch 8/60

Epoch 00008: val_loss improved from 0.10724 to 0.07360, saving model to best_weights.hdf5
Epoch 9/60

Epoch 00009: val_loss improved from 0.07360 to 0.05614, saving model to best_weights.hdf5
Epoch 10/60

Epoch 00010: val_loss did 


Epoch 00038: val_loss improved from 0.00816 to 0.00551, saving model to best_weights.hdf5
Epoch 39/60

Epoch 00039: val_loss did not improve from 0.00551
Epoch 40/60

Epoch 00040: val_loss did not improve from 0.00551
Epoch 41/60

Epoch 00041: val_loss did not improve from 0.00551
Epoch 42/60

Epoch 00042: val_loss did not improve from 0.00551
Epoch 43/60

Epoch 00043: val_loss did not improve from 0.00551
Epoch 44/60

Epoch 00044: val_loss did not improve from 0.00551
Epoch 45/60

Epoch 00045: val_loss did not improve from 0.00551
Epoch 46/60

Epoch 00046: val_loss did not improve from 0.00551
Epoch 47/60

Epoch 00047: val_loss did not improve from 0.00551
Epoch 48/60

Epoch 00048: val_loss did not improve from 0.00551
Epoch 49/60

Epoch 00049: val_loss did not improve from 0.00551
Epoch 50/60

Epoch 00050: val_loss did not improve from 0.00551
Epoch 51/60

Epoch 00051: val_loss did not improve from 0.00551
Epoch 52/60

Epoch 00052: val_loss did not improve from 0.00551
Epoch 53/60



In [25]:
train_score = model.evaluate(x_train, y_train, verbose=1)
print("Training Accuracy: ", train_score[1])

(1161, 128, 44, 1)
Training Accuracy:  1.0
