In [1]:
import numpy as np 
import os
import pandas as pd 
from scipy.io import wavfile

import librosa
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt
from tensorflow.keras import regularizers, activations
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from datetime import datetime 

from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
import cv2

In [2]:
us8k_df = pd.read_pickle("us8k_df.pkl")

In [3]:
df = us8k_df.drop(['fold'],axis=1)
X = np.stack(df.melspectrogram.to_numpy())
X_dim = (128,128,1)
X = X.reshape(X.shape[0], *X_dim)
Y = np.array(df['label'])
Y = to_categorical(Y)

In [4]:
Y.shape, X.shape

((8732, 10), (8732, 128, 128, 1))

In [5]:
X_new = np.zeros((8732,128,128,3))


In [6]:
X_new.shape

(8732, 128, 128, 3)

In [7]:
for i in range(len(X)):
    X_new[i]=cv2.cvtColor(X[i], cv2.COLOR_GRAY2RGB)


In [8]:
X=X_new

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,shuffle=True,stratify = Y)
X_val, X_test, Y_val,Y_test = train_test_split(X_test,Y_test,test_size=0.5,shuffle=True,stratify = Y_test)

MOdèle utilisant mobile net 

In [10]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)

In [11]:
IMG_SHAPE = (128,128,3)

base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [12]:
prediction_layer = tf.keras.layers.Dense(10)
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

In [13]:
# model : MobileNet puis du dropout et une couche dense pour la prédiction
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = preprocess_input(inputs)
x = rescale(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [14]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
tf.math.truediv (TFOpLambda) (None, 128, 128, 3)       0         
_________________________________________________________________
tf.math.subtract (TFOpLambda (None, 128, 128, 3)       0         
_________________________________________________________________
rescaling (Rescaling)        (None, 128, 128, 3)       0         
_________________________________________________________________
mobilenetv2_1.00_128 (Functi (None, 4, 4, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1280)              0     

In [15]:
base_learning_rate = 0.001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [16]:
initial_epochs = 10
num_batch_size = 32
loss0, accuracy0 = model.evaluate(X_val,Y_val)



In [17]:
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_dir, histogram_freq = 1)
save_best = tf.keras.callbacks.ModelCheckpoint(filepath = "logs/checkpoints/", save_weights_only = True,
                                               monitor = "val_accuracy", mode = "max", save_best_only = True)

In [None]:
model_fit = model.fit(X_train[:2000],Y_train[0:2000], epochs=initial_epochs,validation_data=(X_val[0:300],Y_val[0:300]),batch_size=num_batch_size,callbacks = [tensorboard_callback, save_best])


Epoch 1/10
Epoch 2/10
 4/63 [>.............................] - ETA: 39s - loss: 0.3885 - accuracy: 0.0859