In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

In [2]:
data = pd.read_csv("SOUND_CLASSIFICATION_DATASET/training1.csv")

In [11]:
data.head()

Unnamed: 0,NAME,LABEL
0,1,1
1,2,1
2,3,1
3,4,0
4,5,0


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1584 entries, 0 to 1583
Data columns (total 2 columns):
NAME     1584 non-null int64
LABEL    1584 non-null int64
dtypes: int64(2)
memory usage: 24.8 KB


In [5]:
from librosa import display
import librosa

In [10]:
# PREPROCESSING 
x_train=[]
y_train=[]
path="SOUND_CLASSIFICATION_DATASET/TRAIN_SET/"
for i in tqdm(range(len(data))):
    file=str(data.iloc[i]["NAME"])+".wav"
    label=data.iloc[i]["LABEL"]
    filename=path+file
    y,sr=librosa.load(filename)
    mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0)
    melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0)
    chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0)
    features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens)),(40,5))
    x_train.append(features)
    y_train.append(label)

100%|██████████████████████████████████████████████████████████████████████████████| 1584/1584 [15:51<00:00,  1.66it/s]


In [12]:
dataset = pd.read_csv("SOUND_CLASSIFICATION_DATASET/testing1.csv")

In [13]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 2 columns):
NAME     56 non-null int64
LABEL    56 non-null int64
dtypes: int64(2)
memory usage: 976.0 bytes


In [14]:
# PREPROCESSING TEST SET
x_test=[]
y_test=[]
path="SOUND_CLASSIFICATION_DATASET/TEST_SET/"
for i in tqdm(range(len(dataset))):
    file=str(dataset.iloc[i]["NAME"])+".wav"
    label=dataset.iloc[i]["LABEL"]
    filename=path+file
    y,sr=librosa.load(filename)
    mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0)
    melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0)
    chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0)
    features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens)),(40,5))
    x_test.append(features)
    y_test.append(label)

100%|██████████████████████████████████████████████████████████████████████████████████| 56/56 [00:35<00:00,  1.59it/s]


In [15]:
# CONVERTING TO ARRAY
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

In [16]:
x_train.shape , y_train.shape , x_test.shape , y_test.shape

((1584, 40, 5), (1584,), (56, 40, 5), (56,))

In [17]:
y_train = y_train.reshape((1584,1))
y_test = y_test.reshape((56,1))

In [18]:
x_test = x_test.reshape((56,40,5,1))

In [19]:
x_test.shape

(56, 40, 5, 1)

In [20]:
#converting to one hot
from keras.utils.np_utils import to_categorical
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)
y_train.shape,y_test.shape

Using TensorFlow backend.


((1584, 2), (56, 2))

In [21]:
y_train[1583]

array([0., 1.], dtype=float32)

In [22]:
x_train = x_train.reshape((1584,40,5,1))

In [23]:
x_train.shape

(1584, 40, 5, 1)

In [24]:
from keras import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout

In [25]:
model = Sequential()

In [26]:
#adding layers and forming the model
model.add(Conv2D(64,kernel_size=5,strides=1,padding="Same",activation="relu",input_shape=(40,5,1)))
model.add(MaxPooling2D(padding="same"))

model.add(Conv2D(128,kernel_size=5,strides=1,padding="same",activation="relu"))
model.add(MaxPooling2D(padding="same"))
model.add(Dropout(0.3))

model.add(Flatten())

model.add(Dense(256,activation="relu"))
model.add(Dropout(0.3))

model.add(Dense(512,activation="relu"))
model.add(Dropout(0.3))

model.add(Dense(2,activation="softmax"))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [27]:
#compiling
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

In [28]:
# Training the model
model.fit(x_train,y_train,batch_size=32,epochs=50,validation_data=(x_test,y_test))

Instructions for updating:
Use tf.cast instead.
Train on 1584 samples, validate on 56 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50


Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50


Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50


Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50


Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50


Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50


Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50


Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50


Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50


Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1fad094eb38>

In [88]:
mode.get_weights()

[array([[[[ 0.01125067, -0.03761831,  0.00506061, ...,  0.01135404,
           -0.01163099,  0.01054627]],
 
         [[ 0.000873  , -0.01929228, -0.00669033, ...,  0.02965062,
            0.00422322, -0.02420651]],
 
         [[-0.04785929, -0.04232047, -0.01231945, ...,  0.01878584,
            0.09917814, -0.0047835 ]],
 
         [[-0.12049931, -0.03172345,  0.00635783, ..., -0.07323691,
           -0.06069348,  0.01804657]],
 
         [[-0.01670641, -0.03143491,  0.01803452, ..., -0.01929908,
           -0.19950385,  0.08126143]]],
 
 
        [[[ 0.02750266,  0.06977442, -0.0290975 , ...,  0.03676389,
            0.00761043, -0.0290525 ]],
 
         [[ 0.00993804, -0.00144175, -0.14186163, ..., -0.09806665,
            0.01136503, -0.06294262]],
 
         [[ 0.0057756 , -0.01529799, -0.04213442, ..., -0.04734529,
            0.00867265, -0.05124036]],
 
         [[ 0.00039044,  0.08845065,  0.0944014 , ..., -0.14531694,
           -0.03648807, -0.05597885]],
 
         [[ 0.04

In [29]:
score_train = model.evaluate(x_train,y_train)
score_test = model.evaluate(x_test,y_test)



In [30]:
print("Accuracy on training set : ", score_train[1])
print("Accuracy on testing set : ", score_test[1])

Accuracy on training set :  0.9924242424242424
Accuracy on testing set :  0.9642857142857143


In [31]:
model.save("speech_model_correct.h5")