#Convolutional neural network

###import library

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from glob import glob
#Le module glob recherche tous les chemins correspondant à un motif 
#particulier selon les règles utilisées par le shell Unix, les résultats sont renvoyés dans un ordre arbitraire.

import librosa
import librosa.display
import IPython.display as ipd

from tqdm import tqdm
import os

###Création mel spectrogramme pour chaque fichier audio

In [3]:
feature_list = []
label_list = []
dictMel = {}
badsound=0
badlabel_list=[]
# Iterate over all files in given source path
print('Preparing feature dataset and labels.')
for file in tqdm(os.listdir('.')):
    # Skip if it's not a wav file
    if not file.endswith('.wav'):
        continue
    # Load audio and stretch it to length 1s
    audio_path = os.path.join('.', file)
    audio, sr = librosa.load(path=audio_path, sr=None)
    audio = librosa.effects.time_stretch(y=audio, rate=len(audio)/sr)
    # Calculate features and get the label from the filename
    mels = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048, hop_length=512)
    mels_db = librosa.power_to_db(S=mels, ref=1.0)
    if mels_db.shape == (128,87):
      feature_list.append(mels_db.reshape((128, 87, 1)))
      filename = int(str(file)[:-4])
      label_list.append(filename)
      dictMel[filename] = mels_db
    else:
      badsound+=1
      filename = int(str(file)[:-4])
      badlabel_list.append(filename)

features = np.array(feature_list)
labels = np.array(label_list)
badlabels=np.array(badlabel_list)


Preparing feature dataset and labels.


  return f(*args, **kwargs)
  return f(*args, **kwargs)
100%|██████████| 1008/1008 [00:18<00:00, 55.10it/s]


In [4]:
badlabels

array([43750,  8750, 26861, 36468, 16782, 31403, 21177, 15610, 21945,
       32904, 30478,  6273, 25308, 38760, 32886,   700,  3194, 20174,
        7119,  8202, 34702,  2702])

In [5]:
features.shape

(979, 128, 87, 1)

In [6]:
labels.shape

(979,)

###Tri des élements par ordre de nom de fichier

In [7]:
dataMel = pd.Series(dictMel)
dataMel=dataMel.sort_index()
listMel =dataMel.to_list()
liste_son=dataMel.index.to_list()

###On converti en matrice 3D

In [8]:
def list_to_matrix(X):
  matrix = np.zeros(shape=(len(X),128, 87))
  for i in range(len(X)):
    if X[i].shape == (128, 87):
      #print(X[i].shape, ' son num : ',i)
      matrix[i]=X[i]
  return matrix

matrix = list_to_matrix(listMel)


In [9]:
matrix

array([[[-35.94540405, -28.56772614, -23.93755913, ..., -21.5626297 ,
         -30.52719307, -41.92855453],
        [-38.76150131, -34.90169907, -29.99400902, ..., -29.52453423,
         -32.86561584, -47.08643341],
        [-42.36565399, -32.02842331, -26.66487122, ..., -35.22727966,
         -39.07947922, -47.81403351],
        ...,
        [-57.5069046 , -57.5069046 , -57.5069046 , ..., -57.5069046 ,
         -57.5069046 , -57.5069046 ],
        [-57.5069046 , -57.5069046 , -57.5069046 , ..., -57.5069046 ,
         -57.5069046 , -57.5069046 ],
        [-57.5069046 , -57.5069046 , -57.5069046 , ..., -57.5069046 ,
         -57.5069046 , -57.5069046 ]],

       [[-39.32492828, -32.55230713, -33.73304367, ..., -32.1493721 ,
         -42.28428268, -48.30501175],
        [-18.75129509, -13.87823105, -10.6269207 , ..., -19.85523033,
         -27.72628975, -41.2025528 ],
        [-11.56927872,  -2.02730489,   3.23200607, ...,  -8.74022198,
         -19.87855148, -32.91221237],
        ...,


###Récupération des labels

In [10]:
datacomp=pd.read_csv("Sample9_label_mediane_.csv",sep=",")

In [11]:
datacomp

Unnamed: 0,filename,mediane,bool_audible
0,000000.wav,3,1
1,000057.wav,2,0
2,000094.wav,4,1
3,000110.wav,3,1
4,000135.wav,4,1
...,...,...,...
974,047979.wav,3,1
975,048018.wav,3,1
976,048048.wav,0,0
977,048069.wav,0,0


In [12]:
label_list


[22966,
 47898,
 4023,
 286,
 1198,
 34645,
 29172,
 3943,
 5103,
 26080,
 41769,
 34309,
 23113,
 24654,
 41966,
 20340,
 32022,
 18026,
 7112,
 40271,
 23926,
 40503,
 29238,
 7847,
 28668,
 10491,
 15714,
 42664,
 42670,
 22184,
 41379,
 21314,
 35361,
 29760,
 39215,
 29774,
 45877,
 31648,
 26334,
 4803,
 39599,
 42659,
 32157,
 2080,
 26136,
 31304,
 43786,
 19305,
 2043,
 14147,
 42329,
 46629,
 4368,
 32802,
 7098,
 22230,
 32751,
 45297,
 43619,
 24696,
 15477,
 7073,
 17937,
 26532,
 4977,
 31066,
 27162,
 46588,
 16354,
 1628,
 14225,
 10043,
 6179,
 12130,
 19065,
 20196,
 27612,
 28521,
 8829,
 23474,
 36723,
 29603,
 32551,
 2109,
 14970,
 35766,
 14958,
 30636,
 29367,
 39160,
 9882,
 20592,
 4342,
 44772,
 7139,
 36333,
 11954,
 33539,
 24723,
 41352,
 24290,
 440,
 2525,
 26493,
 33075,
 35376,
 39216,
 37548,
 3835,
 33048,
 15919,
 42841,
 43578,
 12051,
 25825,
 17129,
 19110,
 43550,
 17101,
 858,
 39767,
 28656,
 24087,
 16753,
 21699,
 46616,
 46158,
 47520,
 117

In [13]:
datacomp['filename'].head()

0    000000.wav
1    000057.wav
2    000094.wav
3    000110.wav
4    000135.wav
Name: filename, dtype: object

In [14]:
datacomp['filename']=datacomp['filename'].str.replace('.wav', '',regex =True).astype(int)
datacomp.head()

Unnamed: 0,filename,mediane,bool_audible
0,0,3,1
1,57,2,0
2,94,4,1
3,110,3,1
4,135,4,1


In [15]:
datacomp=datacomp[datacomp['filename'].isin(liste_son)]

In [16]:
datacomp

Unnamed: 0,filename,mediane,bool_audible
0,0,3,1
1,57,2,0
2,94,4,1
3,110,3,1
4,135,4,1
...,...,...,...
974,47979,3,1
975,48018,3,1
976,48048,0,0
977,48069,0,0


In [17]:
# datacomp['Moyenne ']=datacomp['Moyenne '].str.replace(',','.')
# datacomp['Moyenne ']=datacomp['Moyenne '].astype(float)
# datacomp.rename(columns = {'Moyenne ':'Moyenne'}, inplace = True)

On crée la target 

In [18]:
#datacomp.Moyenne = [0 if i<-1.2 else 1 if i<-0.4 else 2 if i<0.4 else 3 if i<1.2 else 4 for i in datacomp.Moyenne]
datacomp['MedianeBin'] = [0 if i<3 else 1 for i in datacomp.mediane]
#datacomp.mediane = [0 if i==-2 else 1 if i==-1 else 2 if i==0 else 3 if i==1 else 4 for i in datacomp.mediane]
#on adapte nos labels pour notre classification

In [19]:
datacomp['mediane'].value_counts()

3    362
0    226
4    217
1     92
2     82
Name: mediane, dtype: int64

In [20]:
datacomp['mediane'].value_counts()

3    362
0    226
4    217
1     92
2     82
Name: mediane, dtype: int64

In [21]:
datacomp['MedianeBin'].value_counts()
#on observe déja un déséquilibre dans les classes

1    579
0    400
Name: MedianeBin, dtype: int64

### Préparation des données pour le réseau de neuronnes

In [22]:
X_train_full = matrix
y_train_full = datacomp[['MedianeBin']]
#on peut changer la métrique de notre label en choisissant La moyenne ou la médiane
#y_train_full = datacomp[['Moyenne']]

In [23]:
# X_train, y_train = X_train_full[:240],y_train_full[:240]
# X_test, y_test = X_train_full[240:],y_train_full[240:]

In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train_full, y_train_full, test_size=0.2,random_state=123)

In [25]:
print(X_test.shape)
print(y_test.shape)

(196, 128, 87)
(196, 1)


###Constuire un model avec tf.keras.sequential

In [26]:
import tensorflow as tf
from tensorflow import keras

: 

: 

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

: 

In [None]:
model = keras.Sequential(layers=[
        keras.layers.InputLayer(input_shape=features[0].shape),
        keras.layers.Conv2D(16, 3, padding='same', activation=keras.activations.relu),
        keras.layers.MaxPooling2D(),
        keras.layers.Conv2D(32, 3, padding='same', activation=keras.activations.relu),
        keras.layers.MaxPooling2D(),
        keras.layers.Flatten(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(64, activation=keras.activations.relu),
        keras.layers.Dense(1, activation=keras.activations.sigmoid)
    ])
model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.binary_crossentropy, metrics=['accuracy'])
print(model.summary())

: 

In [None]:
# Set parameters for data splitting and training
TEST_SIZE = 0.2
BATCH_SIZE = 25
EPOCHS = 20

# Encode Labels
#encoded_labels = tf.one_hot(indices=labels, depth=10)
# Split dataset to train and test data
#X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels.numpy(), test_size=TEST_SIZE)

# Train the model
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(x=X_train, y=y_train, validation_split=TEST_SIZE, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping])

# Plot the training history
fig, axs = plt.subplots(2)
fig.set_size_inches(12, 8)
fig.suptitle('Training History', fontsize=16)
axs[0].plot(history.epoch, history.history['loss'], history.history['val_loss'])
axs[0].set(title='Loss', xlabel='Epoch', ylabel='Loss')
axs[0].legend(['loss', 'val_loss'])
axs[1].plot(history.epoch, history.history['accuracy'], history.history['val_accuracy'])
axs[1].set(title='Accuracy', xlabel='Epoch', ylabel='Accuracy')
axs[1].legend(['accuracy', 'val_accuracy'])
plt.show()

: 