# <center> **Audio Classification using Deep Learning**


# **Introduction**


## Importing and installing the required modules


In [None]:
import importlib

def install_and_import(package):
    try:
        importlib.import_module(package)
        print(f"{package} est déjà installé.")
    except ImportError:
        print(f"{package} n'est pas installé. Installation en cours...")
        !pip install {package}
    finally:
        globals()[package] = importlib.import_module(package)
        print(f"{package} est maintenant importé.")

# Vérifier et installer les bibliothèques nécessaires
libraries_to_check = ['IPython', 'librosa', 'shutil', 'pandas', 'os', 'time', 'warnings', 'random',
                      'seaborn', 'numpy', 'tqdm', 'matplotlib', 'sklearn', 'tensorflow' , 'resampy' ]

all_libraries_found = True

for library in libraries_to_check:
    try:
        importlib.import_module(library)
        print(f"{library} est déjà installé.")
    except ImportError:
        print(f"{library} n'est pas installé. Installation en cours...")
        !pip install {library}
        all_libraries_found = False

# Afficher un message de confirmation
if all_libraries_found:
    print("Toutes les bibliothèques nécessaires sont installées et importées.")
else:
    print("Certaines bibliothèques ont été installées et importées.")


In [None]:
import IPython.display as ipd
import librosa
import joblib
import librosa.display
import shutil
import pandas as pd
import os, time, warnings
import random
import pandas as pd
import seaborn as sns
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense,
    Conv1D,
    MaxPooling1D,
    BatchNormalization,
    Dropout,
    Flatten,
    Conv2D,
    MaxPool2D,
)

warnings.filterwarnings("ignore")


## Creating DataFrame For models Results

In [None]:
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log = pd.DataFrame(columns=log_cols)



# **Data**


## Convert Audio File

In [None]:
def convert_ogg_to_wav(input_file, output_file):
    data, samplerate = soundfile.read(input_file)
    soundfile.write(output_file, data, samplerate, format='WAV', subtype='PCM_16')

def convert_folder_ogg_to_wav(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".ogg"):
                ogg_path = os.path.join(root, file)
                wav_path = os.path.splitext(ogg_path)[0] + ".wav"
                convert_ogg_to_wav(ogg_path, wav_path)
                print(f"Conversion: {ogg_path} -> {wav_path}")
                os.remove(ogg_path)


data_folder = 'data'

convert_folder_ogg_to_wav(data_folder)

In [None]:
import os
import shutil

# Spécifiez le chemin du dossier à nettoyer
dossier_a_nettoyer = 'data/all_data'

# Vérifiez si le dossier existe
if os.path.exists(dossier_a_nettoyer):
    # Parcourez tous les fichiers du dossier
    for fichier in os.listdir(dossier_a_nettoyer):
        chemin_fichier = os.path.join(dossier_a_nettoyer, fichier)
        try:
            if os.path.isfile(chemin_fichier):
                # Supprimez le fichier
                os.unlink(chemin_fichier)
            elif os.path.isdir(chemin_fichier):
                # Supprimez le dossier récursivement
                shutil.rmtree(chemin_fichier)
        except Exception as e:
            print(f"Erreur lors de la suppression du fichier {chemin_fichier}: {e}")

    print(f"Tous les fichiers dans {dossier_a_nettoyer} ont été supprimés.")
else:
    print(f"Le dossier {dossier_a_nettoyer} n'existe pas.")


## Create Audio Folder and Labels

In [None]:
def create_all_data_folder(root_folder, output_folder="all_data", csv_filename="data.csv"):
    # Créer le chemin du dossier all_data
    all_data_path = os.path.join(root_folder, output_folder)

    # Vérifier si le dossier all_data existe, sinon le créer
    if not os.path.exists(all_data_path):
        os.makedirs(all_data_path)
        print(f"Le dossier {output_folder} a été créé avec succès.")

    # Obtenir la liste des fichiers dans le dossier all_data
    existing_files = os.listdir(all_data_path)

    # Vérifier si le dossier all_data est déjà rempli
    if existing_files:
        print(f"Le dossier {output_folder} n'est pas vide. Aucune action nécessaire.")
        return

    # Parcourir les sous-dossiers (classes) dans le dossier racine
    classes = [d for d in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, d)) and d != output_folder]


    # Liste pour stocker les informations sur les fichiers audio
    metadata = []

    for audio_class in classes:
        class_path = os.path.join(root_folder, audio_class)

        if os.path.isdir(class_path):
            # Parcourir les fichiers audio dans chaque classe
            audio_files = [f for f in os.listdir(class_path) if f.endswith(".wav")]

            # Mélanger les fichiers audio
            random.shuffle(audio_files)

            # Copier les fichiers dans le dossier all_data
            for audio_file in audio_files:
                source_path = os.path.join(class_path, audio_file)
                dest_path = os.path.join(all_data_path, audio_file)
                shutil.copyfile(source_path, dest_path)  # Utiliser shutil.copyfile pour copier les fichiers

                # Ajouter les informations à la liste metadata
                metadata.append({"file": audio_file, "class": audio_class})

    # Créer un DataFrame avec les métadonnées
    metadata_df = pd.DataFrame(metadata)

    # Sauvegarder les métadonnées dans un fichier CSV
    csv_path = os.path.join(all_data_path, csv_filename)
    metadata_df.to_csv(csv_path, index=False)

    print(f"Le fichier CSV {csv_filename} a été créé dans le dossier {output_folder}.")


root_folder = "data"
create_all_data_folder(root_folder)


## Reading the data


In [None]:
# reading the files
audio_dataset_path = "data/all_data/"

# loading the csv
meta_data = pd.read_csv("data/all_data/data.csv")
meta_data["class"] = meta_data["class"].replace(to_replace="Baby", value="Baby")
meta_data["class"] = meta_data["class"].replace(to_replace="Chainsaw", value="Chainsaw")
meta_data["class"] = meta_data["class"].replace(to_replace="Clocktick", value="Clock Tick")
meta_data["class"] = meta_data["class"].replace(to_replace="Cow", value="Cow")
meta_data["class"] = meta_data["class"].replace(to_replace="Dog", value="Dog")
meta_data["class"] = meta_data["class"].replace(to_replace="Fire", value="Fire")
meta_data["class"] = meta_data["class"].replace(to_replace="Frog", value="Frog")
meta_data["class"] = meta_data["class"].replace(to_replace="Helicopter", value="Helicopter")
meta_data["class"] = meta_data["class"].replace(to_replace="Pig", value="Pig")
meta_data["class"] = meta_data["class"].replace(to_replace="Rain", value="Rain")
meta_data["class"] = meta_data["class"].replace(to_replace="Rooster", value="Rooster")
meta_data["class"] = meta_data["class"].replace(to_replace="Sea", value="Sea")
meta_data["class"] = meta_data["class"].replace(to_replace="Sneeze", value="Sneeze")

meta_data["classID"] = pd.factorize(meta_data["class"])[0]


print(meta_data.head())

meta_data.head()


In [None]:
meta_data.groupby("classID")["class"].unique()


In [None]:
x = meta_data["class"].unique()
y = meta_data["class"].value_counts(ascending=True)
ind = np.arange(len(y))
# plt.figure()
fig, ax = plt.subplots(figsize=(15, 5))
ax.barh(ind, y)
ax.set_yticks(ind)
ax.set_yticklabels(x)
ax.bar_label(ax.containers[0])
plt.gcf().set_dpi(500)
plt.title("Number of Audio Samples per Category")
plt.xlabel("Number of Samples")
plt.ylabel("Category")
plt.show()


# **MFCC Visualization**


In [None]:
plt.rcParams["figure.figsize"] = (6, 4)
plt.rcParams["figure.dpi"] = 80






In [None]:
audio_path = audio_dataset_path + "1-187207-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Baby Crying")
plt.show


In [None]:
audio_path = audio_dataset_path + "2-50667-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Chainsaw")
plt.show


In [None]:
audio_path = audio_dataset_path + "4-198965-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of ClockTick")
plt.show


In [None]:
audio_path = audio_dataset_path + "1-77241-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Cow")
plt.show


In [None]:
audio_path = audio_dataset_path + "3-144028-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Dog Barking")
plt.show


In [None]:
audio_path = audio_dataset_path + "4-182368-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Fire Crackling")
plt.show


In [None]:
audio_path = audio_dataset_path + "5-189795-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Frog sound")
plt.show


In [None]:
audio_path = audio_dataset_path + "5-177957-C.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Helicopter sound")
plt.show


In [None]:
audio_path = audio_dataset_path + "3-257858-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Pig")
plt.show


In [None]:
audio_path = audio_dataset_path + "2-117625-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Rain")
plt.show


In [None]:
audio_path = audio_dataset_path + "3-137152-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Rooster")
plt.show


In [None]:
audio_path = audio_dataset_path + "5-219379-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Sea Waves")
plt.show


In [None]:
audio_path = audio_dataset_path + "2-93030-A.wav"
(xf, sr) = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(y=xf, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs, x_axis="time")
plt.colorbar()
plt.tight_layout()
plt.title("MFCC Of Person Sneeze")
plt.show


# **Feature Extraction and Database Building**

## Method

1. We have used Librosa to preprocess audio file.
2. To do so, I will go through each fold and extract the data from each file using librosa's mfcc function.
3. The extracted data is appended in a list and stored in a dataframe.

### The function bellow will extract mfcc feature

In [None]:
extracted = []

for index_num, row in tqdm(meta_data.iterrows()):
    # Recuperer le fichier
    file_name = os.path.join(
        os.path.abspath(audio_dataset_path),
        row["file"]
    )
    # Recuperer La Classe
    final_class_labels = row["class"]
    # Importer les fichiers audio
    audio, sample_rate = librosa.load(file_name, res_type="kaiser_fast")
    # Extraction des caracterestiques
    feature = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=128)
    # Normalisation des caracterestiques
    scaled_feature = np.mean(feature.T, axis=0)
    # Stocker dans une liste
    extracted.append([scaled_feature, final_class_labels])

### Using a dataframe and pickle to save the extracted features array


In [None]:
import pandas as pd

# Convertir la liste extracted en DataFrame
columns = ["features", "class"]
extracted_df = pd.DataFrame(extracted, columns=columns)

# Enregistrer le DataFrame dans un fichier CSV
output_csv_path = "data/feature.csv"
extracted_df.to_csv(output_csv_path, index=False)
extracted_df.to_pickle("extracted_df.pkl")
extracted_df.head()
print(f"Les caractéristiques ont été enregistrées dans {output_csv_path}.")


# **Data Preprocessing**


## Distribute the data to X and Y


In [None]:
final = pd.read_pickle("extracted_df.pkl")
X = np.array(final["features"].tolist())
y = np.array(final["class"].tolist())
print(X.shape)


## Using LabelEncoder() to encode the string labels to an integer


In [None]:
# codage des classes
le = LabelEncoder()

# transform each category with it's respected label
Y = to_categorical(le.fit_transform(y))

# Sauvegardez l'objet LabelEncoder
joblib.dump(le, 'label_encoder.joblib')

## Split the data into train and test sets


In [None]:
# diviser les données de test et d'apprentissage
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

print("Number of training samples = ", X_train.shape[0])
print("Number of testing samples = ", X_test.shape[0])


# **Model 1 - ANN**


## Building the model


In [None]:
# Construction du model

num_labels = Y.shape[1]
ANN_Model = Sequential()
ANN_Model.add(Dense(1000, activation="relu", input_shape=(128,)))
ANN_Model.add(Dense(750, activation="relu"))
ANN_Model.add(Dense(500, activation="relu"))
ANN_Model.add(Dense(250, activation="relu"))
ANN_Model.add(Dense(100, activation="relu"))
ANN_Model.add(Dense(50, activation="relu"))
ANN_Model.add(Dense(num_labels, activation="softmax"))
ANN_Model.summary()


## Compiling the model


In [None]:
ANN_Model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)


## Fitting the model


In [None]:
num_epochs = 250
num_batch_size = 64

t0 = time.time()
print("Shape of X_train:", X_train.shape)

ANN_Results = ANN_Model.fit(
    X_train,
    y_train,
    batch_size=num_batch_size,
    epochs=num_epochs,
    validation_data=(X_test, y_test),
)

ANN_Model.save("Audio_Classifier_ANN.h5")
print("ANN Model Saved")
train_hist_m1 = pd.DataFrame(ANN_Results.history)
train_m1 = round(time.time() - t0, 3)


## Results


In [None]:
plt.figure(figsize=(10, 5), dpi=300)
plt.plot(train_hist_m1[["loss", "val_loss"]])
plt.legend(["Loss", "Validation Loss"])
plt.title("Loss Per Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()


In [None]:
plt.figure(figsize=(10, 5), dpi=300)
plt.plot(train_hist_m1[["accuracy", "val_accuracy"]])
plt.legend(["Accuracy", "Validation Accuracy"])
plt.title("Accuracy Per Epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()


In [None]:
# Mesurer les performances et ajouter une entrée au DataFrame log
acc_m1 = ANN_Model.evaluate(X_test, y_test, verbose=0)
t0 = time.time()
y_pred_m1 = ANN_Model.predict(X_test, verbose=0)
pred_m1 = round(time.time() - t0, 3)

# Créer une nouvelle entrée à ajouter au DataFrame log
model_name = "ANN"
accuracy = acc_m1[1] * 100
training_time = train_m1
prediction_time = pred_m1

log_data = {"Model": [model_name], "Accuracy": [accuracy], "Training Time": [training_time], "Prediction Time": [prediction_time]}

# Créer le DataFrame à partir de log_data
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log_entry = pd.DataFrame(log_data, columns=log_cols)

# Ajouter l'entrée au DataFrame log
log = pd.concat([log, log_entry], ignore_index=True)

## ANN Prediction Function


In [None]:
# fonction de prediction avec le modele ANN
def ANN_Prediction(file_name):
    # load the audio file
    audio_data, sample_rate = librosa.load(file_name, res_type="kaiser_fast")
    # get the feature
    feature = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=128)
    # scale the features
    feature_scaled = np.mean(feature.T, axis=0)
    # array of features
    prediction_feature = np.array([feature_scaled])
    # get the id of label using argmax
    predicted_vector = np.argmax(ANN_Model.predict(prediction_feature), axis=-1)
    # get the class label from class id
    predicted_class = le.inverse_transform(predicted_vector)
    # display the result
    print("ANN has predicted the class as  --> ", predicted_class[0])





## Testing the Model on Sample audio


In [None]:
# Repertoire et nom du fichier
file_name = audio_dataset_path + "2-93030-A.wav"
# Fonction de prediction
ANN_Prediction(file_name)
# Jouer l'audio
ipd.Audio(file_name)


# **Model 2 - CNN1D**


## Preprocessing


In [None]:
xTrainval, xTest, yTrainval, yTest = train_test_split(
    X, Y, test_size=0.1, stratify=y, random_state=387
)
xTrain, xvalid, yTrain, yvalid = train_test_split(
    xTrainval, yTrainval, test_size=0.2, stratify=yTrainval, random_state=387
)
print("\nNumber of samples for Train set :", xTrain.shape[0])
print("Number of samples for Validation set :", xvalid.shape[0])
print("Number of samples for Test set :", xTest.shape[0])

xTrain = np.expand_dims(xTrain, axis=2)
xvalid = np.expand_dims(xvalid, axis=2)

print("Shape of X Train", xTrain.shape)
print("Shape of X Test", xTest.shape)


## Building the CNN1D Model


In [None]:
CNN1D_Model = Sequential()
CNN1D_Model.add(
    Conv1D(
        256,
        5,
        strides=1,
        padding="same",
        activation="relu",
        input_shape=(xTrain.shape[1], 1),
    )
)
CNN1D_Model.add(BatchNormalization())
CNN1D_Model.add(MaxPooling1D(3, strides=2, padding="same"))
CNN1D_Model.add(Conv1D(256, 5, strides=1, padding="same", activation="relu"))
CNN1D_Model.add(Dropout(0.3))
CNN1D_Model.add(MaxPooling1D(3, strides=2, padding="same"))
CNN1D_Model.add(Conv1D(128, 5, strides=1, padding="same", activation="relu"))
CNN1D_Model.add(Dropout(0.3))
CNN1D_Model.add(MaxPooling1D(3, strides=2, padding="same"))
CNN1D_Model.add(Conv1D(64, 5, strides=1, padding="same", activation="relu"))
CNN1D_Model.add(Dropout(0.3))
CNN1D_Model.add(MaxPooling1D(3, strides=2, padding="same"))
CNN1D_Model.add(Flatten())
CNN1D_Model.add(Dense(units=1024, activation="relu"))
CNN1D_Model.add(Dropout(0.3))
CNN1D_Model.add(Dense(units=13, activation="softmax"))
CNN1D_Model.summary()


## Compiling the Model


In [None]:
CNN1D_Model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)


## Fitting the Model


In [None]:
t0 = time.time()

CNN1D_Results = CNN1D_Model.fit(
    xTrain, yTrain, batch_size=64, epochs=250, validation_data=(xvalid, yvalid)
)

CNN1D_Model.save("Audio_Classifier_CNN1D.h5")
print("CNN1D Model Saved")
train_hist_m2 = pd.DataFrame(CNN1D_Results.history)
train_m2 = round(time.time() - t0, 3)


## Results


In [None]:
plt.figure(figsize=(10, 5), dpi=300)
plt.plot(train_hist_m2[["loss", "val_loss"]])
plt.legend(["Loss", "Validation Loss"])
plt.title("Loss Per Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()


In [None]:
plt.figure(figsize=(10, 5), dpi=300)
plt.plot(train_hist_m2[["accuracy", "val_accuracy"]])
plt.legend(["Accuracy", "Validation Accuracy"])
plt.title("Accuracy Per Epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()


In [None]:
acc_m2 = CNN1D_Model.evaluate(xvalid, yvalid, verbose=0)
t0 = time.time()
y_pred_m2 = CNN1D_Model.predict(xvalid, verbose=0)
pred_m2 = round(time.time() - t0, 3)

# Créer une nouvelle entrée à ajouter au DataFrame log
model_name = "CNN_1D"
accuracy = acc_m2[1] * 100
training_time = train_m2
prediction_time = pred_m2

log_data = {"Model": [model_name], "Accuracy": [accuracy], "Training Time": [training_time], "Prediction Time": [prediction_time]}

# Créer le DataFrame à partir de log_data
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log_entry = pd.DataFrame(log_data, columns=log_cols)

# Ajouter l'entrée au DataFrame log
log = pd.concat([log, log_entry], ignore_index=True)

## CNN1D Prediction Function


In [None]:
# fonction de prediction CNN1D
def CNN1D_Prediction(file_name):
    # load the audio file
    audio_data, sample_rate = librosa.load(file_name, res_type="kaiser_fast")
    # get the feature
    feature = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=128)
    # scale the features
    feature_scaled = np.mean(feature.T, axis=0)
    # array of features
    prediction_feature = np.array([feature_scaled])
    # expand dims
    final_prediction_feature = np.expand_dims(prediction_feature, axis=2)
    # get the id of label using argmax
    predicted_vector = np.argmax(CNN1D_Model.predict(final_prediction_feature), axis=-1)
    # get the class label from class id
    predicted_class = le.inverse_transform(predicted_vector)
    # display the result
    print("CNN1D has predicted the class as  --> ", predicted_class[0])


## Testing the Model on Sample audio


In [None]:
# Nom fichier
file_name = audio_dataset_path + "2-93030-A.wav"
# Fonction de prediction
CNN1D_Prediction(file_name)
# Jouer le fichier audio
ipd.Audio(file_name)


# **Model 3 - CNN2D**


## Preprocessing


In [None]:
xtrain = xTrain.reshape(xTrain.shape[0], 16, 8, 1)
xtest = xTest.reshape(xTest.shape[0], 16, 8, 1)

print("The Shape of X Train", xtrain.shape)
print("The Shape of Y Train", yTrain.shape)
print("The Shape of X Test", xtest.shape)
print("The Shape of Y Test", yTest.shape)


## Building the CNN2D Model


In [None]:
CNN2D_Model = Sequential()
CNN2D_Model.add(
    Conv2D(64, (3, 3), padding="same", activation="tanh", input_shape=(16, 8, 1))
)
CNN2D_Model.add(MaxPool2D(pool_size=(2, 2)))
CNN2D_Model.add(Conv2D(128, (3, 3), padding="same", activation="tanh"))
CNN2D_Model.add(MaxPool2D(pool_size=(2, 2)))
CNN2D_Model.add(Dropout(0.1))
CNN2D_Model.add(Flatten())
CNN2D_Model.add(Dense(1024, activation="tanh"))
CNN2D_Model.add(Dense(13, activation="softmax"))
CNN2D_Model.summary()


## Compiling the Model


In [None]:
CNN2D_Model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)


## Fitting the Model


In [None]:
t0 = time.time()

CNN2D_Results = CNN2D_Model.fit(
    xtrain, yTrain, epochs=250, batch_size=64, validation_data=(xtest, yTest)
)

CNN2D_Model.save("Audio_Classifier_CNN2D.h5")
print("CNN2D Model Saved")
train_hist_m3 = pd.DataFrame(CNN2D_Results.history)
train_m3 = round(time.time() - t0, 3)


## Results


In [None]:
plt.figure(figsize=(10, 5), dpi=300)
plt.plot(train_hist_m3[["loss", "val_loss"]])
plt.legend(["Loss", "Validation Loss"])
plt.title("Loss Per Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()


In [None]:
plt.figure(figsize=(10, 5), dpi=300)
plt.plot(train_hist_m3[["accuracy", "val_accuracy"]])
plt.legend(["Accuracy", "Validation Accuracy"])
plt.title("Accuracy Per Epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()


In [None]:
acc_m3 = CNN2D_Model.evaluate(xtest, yTest, verbose=0)
t0 = time.time()
y_pred_m3 = CNN2D_Model.predict(xtest, verbose=0)
pred_m3 = round(time.time() - t0, 3)
# Créer une nouvelle entrée à ajouter au DataFrame log
model_name = "CNN_2D"
accuracy = acc_m3[1] * 100
training_time = train_m3
prediction_time = pred_m3

log_data = {"Model": [model_name], "Accuracy": [accuracy], "Training Time": [training_time], "Prediction Time": [prediction_time]}

# Créer le DataFrame à partir de log_data
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log_entry = pd.DataFrame(log_data, columns=log_cols)

# Ajouter l'entrée au DataFrame log
log = pd.concat([log, log_entry], ignore_index=True)


## CNN2D Prediction Function


In [None]:
# fonction de prediction CNN2D
def CNN2D_Prediction(file_name):

    audio_data, sample_rate = librosa.load(file_name, res_type="kaiser_fast")

    feature = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=128)

    feature_scaled = np.mean(feature.T, axis=0)

    prediction_feature = np.array([feature_scaled])

    final_prediction_feature = prediction_feature.reshape(
        prediction_feature.shape[0], 16, 8, 1
    )

    predicted_vector = np.argmax(CNN2D_Model.predict(final_prediction_feature), axis=-1)

    predicted_class = le.inverse_transform(predicted_vector)

    print("CNN2D has predicted the class as  --> ", predicted_class[0])


## Testing the Model on Sample audio


In [None]:
# Repertoire et nom du fichier audio
file_name = audio_dataset_path + "2-93030-A.wav"
# fonction de prediction
CNN2D_Prediction(file_name)
# jouer le fichier audio
ipd.Audio(file_name)


# <center> **Audio Classification using Machine Learning**


## Pre Process Data


In [None]:
import os
import librosa
import pandas as pd
import numpy as np

def extract_features(file_path, feature_types):
    # import le fichier audio
    audio, sr = librosa.load(file_path, res_type='kaiser_fast')

    # Extraction des caracterestiques selon les besoins
    all_features = []

    for feature_type in feature_types:
        if feature_type == 'mfcc':
            features = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=128)
        elif feature_type == 'chroma':
            features = librosa.feature.chroma_stft(y=audio, sr=sr)
        elif feature_type == 'mel':
            features = librosa.feature.melspectrogram(y=audio, sr=sr)
        elif feature_type == 'contrast':
            features = librosa.feature.spectral_contrast(y=audio, sr=sr)
        elif feature_type == 'tonnetz':
            features = librosa.feature.tonnetz(y=audio, sr=sr)


        # Normalisation avec la moyenne
        features_mean = features.mean(axis=1)
        all_features.append(features_mean)

    # Concatenate caracteristique si ya plusieurs
    if len(all_features) > 1:
        concatenated_features = np.concatenate(all_features)
        return concatenated_features
    else:
        return all_features[0]

# Charger les données à partir du fichier CSV
csv_path = "data/all_data/data.csv"
df = pd.read_csv(csv_path)

# Définir le chemin du répertoire des fichiers audio
audio_dir = "data/all_data"

# Créer une liste pour stocker les caractéristiques et les étiquettes
features = []
labels = []

# Parcourir chaque ligne du DataFrame
for index, row in df.iterrows():
    file_path = os.path.join(audio_dir, row['file'])

    # Choisissez les types de caractéristiques que vous souhaitez extraire (par exemple, ['mfcc', 'mel', 'tonnetz'])
    feature_types = ['mfcc']

    # Extraction des caracterestique selon les besoins
    extracted_features = extract_features(file_path, feature_types)

    # Ajouter les caractéristiques et l'étiquette à la liste
    features.append(extracted_features)
    labels.append(row['class'])

# Convertir les listes en DataFrame
feature_columns = [f"feature_{i}" for i in range(len(features[0]))]
features_df = pd.DataFrame(features, columns=feature_columns)
labels_df = pd.DataFrame(labels, columns=['class'])

# Concaténer les features et les labels
result_df = pd.concat([features_df, labels_df], axis=1)

# Sauvegarder le DataFrame dans un fichier CSV
result_csv_path = "data/feature_ML.csv"
result_df.to_csv(result_csv_path, index=False)

print(f"Extracted features saved to {result_csv_path}")


## Distribute the data to X and Y


In [None]:
# Convertir les listes en tableaux NumPy
X = pd.DataFrame(features)
y = pd.Series(labels)
print(X)
# Encoder les étiquettes (classes)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


## Prediction Function


In [None]:
import numpy as np

def ML_prediction(file_path, model_path, feature_types):
    # Charger le modèle et l'encodeur de labels à partir des fichiers sauvegardés
    model = joblib.load(model_path)
    label_encoder = joblib.load('label_encoder.joblib')

    # Extraire les caractéristiques du fichier audio
    extracted_features = extract_features(file_path, feature_types)

    # Convertir les caractéristiques en tableau NumPy
    features_array = np.array([extracted_features])

    # Assurer que le tableau est C-contiguous
    features_array = np.ascontiguousarray(features_array)

    # Effectuer la prédiction
    prediction = model.predict(features_array)

    # Convertir la prédiction en classe réelle
    predicted_class = label_encoder.inverse_transform(prediction)[0]
    print("Predicted class: ", predicted_class)


# **Model 4 - Gaussian NB**


## Fitting the model


In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report , accuracy_score

# Créer le classificateur Naive Bayes gaussien
model_GaussNB = GaussianNB()

# Mesurer le temps d'entraînement
start_time = time.time()
model_GaussNB.fit(X_train, y_train)
train_m4 = time.time() - start_time


## Results


In [None]:
# Afficher la précision par classe
y_pred = model_GaussNB.predict(X_test)

class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_, output_dict=True)

# Extraire les précisions par classe
precisions = [class_report[label]['precision'] for label in label_encoder.classes_]

# Créer un graphique à barres
plt.figure(figsize=(10, 6))
sns.barplot(x=label_encoder.classes_, y=precisions)
plt.title('Précision par classe')
plt.xlabel('Classe')
plt.ylabel('Précision')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Mesurer le temps de prédiction
start_time = time.time()
y_pred = model_GaussNB.predict(X_test)
pred_m4 = time.time() - start_time

# Calculer la précision
acc_m4 = accuracy_score(y_test, y_pred)

# Créer une nouvelle entrée à ajouter au DataFrame log
model_name = "Gaussien_NB"
accuracy = acc_m4 * 100
training_time = train_m4
prediction_time = pred_m4

log_data = {"Model": [model_name], "Accuracy": [accuracy], "Training Time": [training_time], "Prediction Time": [prediction_time]}

# Créer le DataFrame à partir de log_data
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log_entry = pd.DataFrame(log_data, columns=log_cols)

# Ajouter l'entrée au DataFrame log
log = pd.concat([log, log_entry], ignore_index=True)



## Save model


In [None]:
# Sauvegarder le modèle dans un fichier avec joblib
model_filename = "Audio_Classifier_GaussianNB.joblib"
joblib.dump(model_GaussNB, model_filename)

## Testing the Model on Sample audio


In [None]:
# Select the file
file_name = "data/Baby/1-187207-A.wav"
# Predict the Class
ML_prediction(file_name, 'Audio_Classifier_GaussianNB.joblib',feature_types)
# play the file
ipd.Audio(file_name)

# **Model 5 - KNN**


## Fitting the model


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Créer le classificateur k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)


start_time = time.time()

# Entraîner le modèle
knn_model.fit(X_train, y_train)

train_m5 = time.time() - start_time

## Results


In [None]:

# Convert X_test to NumPy array if it's not
X_test = np.array(X_test)
# Faire des prédictions sur l'ensemble de test
y_pred = knn_model.predict(X_test)

# Calculer la précision
accuracy = accuracy_score(y_test, y_pred)


# Afficher la précision par classe
class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_, output_dict=True)

# Extraire les précisions par classe
precisions = [class_report[label]['precision'] for label in label_encoder.classes_]

# Créer un graphique à barres
plt.figure(figsize=(10, 6))
sns.barplot(x=label_encoder.classes_, y=precisions)
plt.title('Précision par classe (k-NN)')
plt.xlabel('Classe')
plt.ylabel('Précision')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Mesurer le temps de prédiction
start_time = time.time()
y_pred = knn_model.predict(X_test)
pred_m5 = time.time() - start_time

# Calculer la précision
acc_m5 = accuracy_score(y_test, y_pred)

# Créer une nouvelle entrée à ajouter au DataFrame log
model_name = "KNN"
accuracy = acc_m5 * 100
training_time = train_m5
prediction_time = pred_m5

log_data = {"Model": [model_name], "Accuracy": [accuracy], "Training Time": [training_time], "Prediction Time": [prediction_time]}

# Créer le DataFrame à partir de log_data
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log_entry = pd.DataFrame(log_data, columns=log_cols)

# Ajouter l'entrée au DataFrame log
log = pd.concat([log, log_entry], ignore_index=True)

## Save model


In [None]:
# Sauvegarder le modèle dans un fichier avec joblib
model_filename = "Audio_Classifier_KNN.joblib"
joblib.dump(knn_model, model_filename)

## Testing the Model on Sample audio


In [None]:
# Select the file
file_name = "data/Baby/1-187207-A.wav"
# Predict the Class
ML_prediction(file_name, 'Audio_Classifier_KNN.joblib',feature_types)
# play the file
ipd.Audio(file_name)

# **Model 6 - SVM**


## Fitting the model


In [None]:
from sklearn.svm import SVC

# Créer le classificateur SVM
svm_model = SVC(kernel='linear')


start_time = time.time()

# Entraîner le modèle
svm_model.fit(X_train, y_train)

train_m6 = time.time() - start_time

## Results


In [None]:
# Faire des prédictions sur l'ensemble de test
y_pred = svm_model.predict(X_test)

# Calculer la précision
accuracy = accuracy_score(y_test, y_pred)


# Afficher la précision par classe
class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_, output_dict=True)

# Extraire les précisions par classe
precisions = [class_report[label]['precision'] for label in label_encoder.classes_]

# Créer un graphique à barres
plt.figure(figsize=(10, 6))
sns.barplot(x=label_encoder.classes_, y=precisions)
plt.title('Précision par classe (SVM)')
plt.xlabel('Classe')
plt.ylabel('Précision')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Mesurer le temps de prédiction
start_time = time.time()
y_pred = knn_model.predict(X_test)
pred_m6 = time.time() - start_time

# Calculer la précision
acc_m6 = accuracy_score(y_test, y_pred)

# Créer une nouvelle entrée à ajouter au DataFrame log
model_name = "SVM"
accuracy = acc_m6 * 100
training_time = train_m6
prediction_time = pred_m6

log_data = {"Model": [model_name], "Accuracy": [accuracy], "Training Time": [training_time], "Prediction Time": [prediction_time]}

# Créer le DataFrame à partir de log_data
log_cols = ["Model", "Accuracy", "Training Time", "Prediction Time"]
log_entry = pd.DataFrame(log_data, columns=log_cols)

# Ajouter l'entrée au DataFrame log
log = pd.concat([log, log_entry], ignore_index=True)

## Save model


In [None]:
# Sauvegarder le modèle dans un fichier avec joblib
model_filename = "Audio_Classifier_SVM.joblib"
joblib.dump(svm_model, model_filename)

## Testing the Model on Sample audio


In [None]:

# Select the file
file_name = "data/Baby/1-187207-A.wav"
# Predict the Class
ML_prediction(file_name, 'Audio_Classifier_SVM.joblib',feature_types)
# play the file
ipd.Audio(file_name)

# **Comparative Analysis**


In [None]:
plt.rcParams["figure.figsize"] = (17, 2)
plt.rcParams["figure.dpi"] = 550


In [None]:
ax = sns.barplot(x="Accuracy", y="Model", data=log, color="b")
ax.bar_label(ax.containers[0])
plt.xlabel("Accuracy")
plt.ylabel("Model")
plt.title("Model Accuracy")
plt.show()


In [None]:
ax = sns.barplot(x="Training Time", y="Model", data=log, color="r")
ax.bar_label(ax.containers[0])
plt.xlabel("Training Time")
plt.ylabel("Model")
plt.title("Model Training Time")
plt.show()


In [None]:
ax = sns.barplot(x="Prediction Time", y="Model", data=log, color="g")
ax.bar_label(ax.containers[0])
plt.xlabel("Prediction Time")
plt.ylabel("Model")
plt.title("Model Prediction Time")
plt.show()
