In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, Conv1D, MaxPooling1D

# Data preprocessing

In [3]:
#import the data from npz file
import numpy as np

baby_cry = np.load('output/baby_cry_617061.npz')
dog_bark = np.load('output/dog_bark_142963.npz')
glassbreak = np.load("output/glassbreak_436662.npz")
people_taling = np.load("output/people_talking_590036.npz")



In [34]:
#merge the data and create labels
X = np.concatenate((baby_cry['mfccs'], dog_bark['mfccs'], glassbreak['mfccs'], people_taling['mfccs']))
y = np.concatenate((["baby_cry"] * baby_cry['mfccs'].shape[0], ["dog_bark"] * dog_bark['mfccs'].shape[0],
                    ["glassbreak"] * glassbreak['mfccs'].shape[0], ["people_taling"] * people_taling['mfccs'].shape[0]))
X.shape

(200, 26, 625)

# Data Labeling

In [35]:
#labels to numbers
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(y)



In [36]:
#get only 375 of axis 3
first = X[:, :, :375]
second = X[:, :, -375:]
X = np.concatenate((first, second), axis=0)
y = np.concatenate((y, y), axis=0)

In [37]:
#reshape the data


In [38]:
num_mfcc = X.shape[1]
num_frames = X.shape[2]
num_classes = len(np.unique(y))

In [39]:
X.shape

(400, 26, 375)

# Machine learning model

In [63]:
#Machine learning model.
model = tf.keras.Sequential()
model.add(Conv2D(16, (2, 2), activation='relu', input_shape=(num_mfcc, num_frames, 1)))
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
model.add(Conv2D(8, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))


In [64]:

model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 25, 374, 16)       80        
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 13, 187, 16)      0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 11, 185, 8)        1160      
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 6, 93, 8)         0         
 g2D)                                                            
                                                                 
 flatten_5 (Flatten)         (None, 4464)              0         
                                                                 
 dense_10 (Dense)            (None, 64)               

# Training

In [65]:
#split the data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=64)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1f9001677d0>

# Evaluation

In [54]:
model.predict(X_test)



array([[9.99996662e-01, 8.92137109e-07, 2.02067463e-06, 4.28682398e-07],
       [2.09332095e-03, 9.90646005e-01, 1.04115007e-03, 6.21955376e-03],
       [1.00000000e+00, 1.74588294e-19, 6.23201950e-15, 1.04728361e-15],
       [6.48318892e-05, 9.99935150e-01, 9.80710196e-13, 4.58637251e-13],
       [1.61338206e-02, 9.81923163e-01, 1.94207474e-03, 9.85413635e-07],
       [2.30030697e-02, 9.76996720e-01, 2.38886628e-07, 2.08962161e-13],
       [3.60177128e-22, 2.15783794e-17, 1.00000000e+00, 5.80926489e-20],
       [2.67745078e-01, 7.32146382e-01, 1.08546112e-04, 4.54045290e-09],
       [9.82115507e-01, 1.73202474e-02, 5.61185065e-04, 3.12380348e-06],
       [2.65420973e-22, 3.27710728e-17, 1.00000000e+00, 3.13590216e-24],
       [9.99779046e-01, 2.16585083e-09, 3.90060904e-06, 2.17036199e-04],
       [8.59597435e-07, 3.64199693e-09, 6.14797528e-08, 9.99999046e-01],
       [9.72217858e-01, 2.77565122e-02, 2.56449439e-05, 1.38696921e-09],
       [1.18879252e-03, 9.98782337e-01, 2.88718638e

In [55]:
model.evaluate(X_test, y_test)



[0.5506342649459839, 0.887499988079071]

# Save the model

In [27]:
model.save('model.h5')

In [42]:
#convert the model to tflite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open("model.tflite", "wb").write(tflite_model)



INFO:tensorflow:Assets written to: C:\Users\ciwoe\AppData\Local\Temp\tmp77xknnou\assets


INFO:tensorflow:Assets written to: C:\Users\ciwoe\AppData\Local\Temp\tmp77xknnou\assets


3823684

# Quantization

In [68]:
# post training quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
open("model_quant.tflite", "wb").write(tflite_quant_model)



INFO:tensorflow:Assets written to: C:\Users\ciwoe\AppData\Local\Temp\tmp7htdyq_t\assets


INFO:tensorflow:Assets written to: C:\Users\ciwoe\AppData\Local\Temp\tmp7htdyq_t\assets


292200