In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, LSTM, Bidirectional, Reshape, Attention, Input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import random
import zipfile
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras import regularizers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import BatchNormalization
from tqdm import tqdm

In [4]:
with zipfile.ZipFile('/content/drive/MyDrive/Speech_Emotion_Recogntion/audio_src/combined_dataset_spectrograms_oversampled.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

print(f"Fichiers extraits dans")

Fichiers extraits dans


In [5]:
original_spectrograms_dir = '/content/combined_dataset_spectrograms_oversampled/combined_dataset_spectrograms'
augmented_spectrograms_dir = '/content/combined_dataset_spectrograms_oversampled/augmented_dataset_spectrograms'
metrics_dir = '/content/drive/MyDrive/Speech_Emotion_Recogntion/metrics'

In [6]:
# Dictionnaire pour mapper les émotions à des indices
emotion_to_index = {
    "angry": 0,
    "happy": 1,
    "sad": 2,
    "neutral": 3,
    "fearful": 4,
    "disgust": 5,
    "surprise": 6,
}

# Fonction pour charger les fichiers et leurs labels
def load_data(spectrograms_dir, emotion_to_index):
    images = []
    labels = []
    all_files = [file for file in os.listdir(spectrograms_dir) if file.endswith(".png")]

    for file_name in all_files:
        file_path = os.path.join(spectrograms_dir, file_name)

        # Charger l'image et la redimensionner
        img = Image.open(file_path).convert("RGB")
        img = img.resize((128, 128))
        img_array = np.array(img)

        images.append(img_array)

        # Extraire l'étiquette à partir du nom du fichier
        emotion = file_name.split("_")[1]
        label = emotion_to_index.get(emotion, -1)
        if label == -1:
            raise ValueError(f"Émotion inconnue dans le fichier : {file_name}")

        labels.append(label)

    return np.array(images), np.array(labels)

# Charger les données originales
original_images, original_labels = load_data(original_spectrograms_dir, emotion_to_index)

# Charger les données augmentées
augmented_images, augmented_labels = load_data(augmented_spectrograms_dir, emotion_to_index)

# Normaliser les images
original_images = original_images / 255.0
augmented_images = augmented_images / 255.0

# Encodage One-hot des labels
num_classes = len(emotion_to_index)
original_labels_one_hot = to_categorical(original_labels, num_classes=num_classes)
augmented_labels_one_hot = to_categorical(augmented_labels, num_classes=num_classes)

# Séparer les données originales en ensemble d'entraînement, de validation et de test
X_train, X_temp, y_train, y_temp = train_test_split(
    original_images, original_labels_one_hot, test_size=0.3, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)

# Ajouter les données augmentées uniquement à l'ensemble d'entraînement
X_train = np.concatenate((X_train, augmented_images), axis=0)
y_train = np.concatenate((y_train, augmented_labels_one_hot), axis=0)

# Mélanger les ensembles d'entraînement
shuffle_indices = np.random.permutation(len(X_train))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]

# Résumé des ensembles
print(f"Ensemble d'entraînement : {X_train.shape}, {y_train.shape}")
print(f"Ensemble de validation : {X_val.shape}, {y_val.shape}")
print(f"Ensemble de test : {X_test.shape}, {y_test.shape}")

Ensemble d'entraînement : (27089, 128, 128, 3), (27089, 7)
Ensemble de validation : (4025, 128, 128, 3), (4025, 7)
Ensemble de test : (4025, 128, 128, 3), (4025, 7)


In [7]:
def create_cnn_blstm_with_attention_model(num_classes):
    inputs = Input(shape=(128, 128, 3))

    # Block 1: CNN Layers
    x = Conv2D(32, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    # Flatten
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)

    # Reshape for BLSTM
    x = Reshape((8, 32))(x)

    # BLSTM Layers
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)

    # Attention Mechanism (Self-Attention)
    query = Dense(128, activation='relu')(x)
    value = Dense(128, activation='relu')(x)
    attention_output = layers.Attention()([query, value])

    # Fully Connected Layers
    x = Flatten()(attention_output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

def create_no_cnn_model(num_classes):
    inputs = Input(shape=(128, 128, 3))

    # Suppression du CNN, entrée directe pour la BLSTM
    x = Flatten()(inputs)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Reshape((8, 32))(x)

    # BLSTM + Attention
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)

    query = Dense(128, activation='relu')(x)
    value = Dense(128, activation='relu')(x)
    attention_output = layers.Attention()([query, value])

    x = Flatten()(attention_output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

def create_no_blstm_model(num_classes):
    inputs = Input(shape=(128, 128, 3))

    # CNN uniquement
    x = Conv2D(32, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)

    outputs = Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

def create_no_attention_model(num_classes):
    inputs = Input(shape=(128, 128, 3))

    # CNN
    x = Conv2D(32, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    # BLSTM sans Attention
    x = Reshape((8, 32))(x)
    x = Bidirectional(LSTM(128, return_sequences=False))(x)
    x = Dropout(0.3)(x)

    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

def create_no_dropout_model(num_classes):
    inputs = Input(shape=(128, 128, 3))

    # Block 1: CNN Layers sans Dropout
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Flatten
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)

    # Reshape for BLSTM
    x = Reshape((8, 32))(x)

    # BLSTM Layers
    x = Bidirectional(LSTM(128, return_sequences=True))(x)

    # Attention Mechanism (Self-Attention)
    query = Dense(128, activation='relu')(x)
    value = Dense(128, activation='relu')(x)
    attention_output = layers.Attention()([query, value])

    # Fully Connected Layers
    x = Flatten()(attention_output)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model


def create_no_regularization_model(num_classes):
    inputs = Input(shape=(128, 128, 3))

    # Block 1: CNN Layers sans régularisation
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    # Flatten
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)

    # Reshape for BLSTM
    x = Reshape((8, 32))(x)

    # BLSTM Layers
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)

    # Attention Mechanism (Self-Attention)
    query = Dense(128, activation='relu')(x)
    value = Dense(128, activation='relu')(x)
    attention_output = layers.Attention()([query, value])

    # Fully Connected Layers
    x = Flatten()(attention_output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

In [8]:
model_name = "CNN_BLSTM_with_attention_V3_ablation_oversampled_dataset"
metrics_dir = os.path.join(metrics_dir, model_name)
os.makedirs(metrics_dir, exist_ok=True)

In [11]:
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def save_training_plots(history, model_metrics_dir):
    # Loss plot
    plt.figure(figsize=(6, 5))
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title("Loss Evolution")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(os.path.join(model_metrics_dir, "loss_curve.png"))
    plt.close()

    # Accuracy plot
    plt.figure(figsize=(6, 5))
    plt.plot(history['accuracy'], label='Training Accuracy')
    plt.plot(history['val_accuracy'], label='Validation Accuracy')
    plt.title("Accuracy Evolution")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.savefig(os.path.join(model_metrics_dir, "accuracy_curve.png"))
    plt.close()

def save_confusion_matrix(model, X_test, y_test, model_metrics_dir, emotion_labels):
    # Prédictions sur les données de test
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)  # Convertir les probabilités en classes
    y_true = np.argmax(y_test, axis=1)  # Convertir les labels one-hot en indices de classes

    # Générer la matrice de confusion
    conf_matrix = confusion_matrix(y_true, y_pred_classes)
    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=emotion_labels)

    # Afficher et sauvegarder la matrice de confusion
    plt.figure(figsize=(8, 8))
    disp.plot(cmap='Blues', values_format='d', ax=plt.gca())
    plt.title("Confusion Matrix")
    plt.savefig(os.path.join(model_metrics_dir, "confusion_matrix.png"))
    plt.close()

def train_and_evaluate_model(model_fn, model_name, metrics_dir):
    model = model_fn(num_classes)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping, lr_scheduler],
        verbose=0
    )

    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"{model_name} - Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

    # Créer le répertoire pour stocker les métriques
    model_metrics_dir = os.path.join(metrics_dir, model_name)
    os.makedirs(model_metrics_dir, exist_ok=True)

    # Sauvegarder les courbes et la matrice de confusion
    save_training_plots(history.history, model_metrics_dir)
    save_confusion_matrix(model, X_test, y_test, model_metrics_dir, emotion_to_index.keys())

    return history.history, test_loss, test_accuracy

results = {}

models_to_test = {
    # "No CNN": create_no_cnn_model,
    # "No BLSTM": create_no_blstm_model,
    "No Attention": create_no_attention_model,
    # "No Dropout": create_no_dropout_model,
    # "No Regularization": create_no_regularization_model
}

# Utilisation de tqdm pour afficher la progression
for name, model_fn in tqdm(models_to_test.items(), desc="Ablation Tests"):
    history, loss, accuracy = train_and_evaluate_model(model_fn, name, metrics_dir)
    results[name] = {"loss": loss, "accuracy": accuracy}


Ablation Tests:   0%|          | 0/1 [00:00<?, ?it/s]


ValueError: The total size of the tensor must be unchanged. Received: input_shape=(32, 32, 64), target_shape=(8, 32)

In [None]:
# plt.figure(figsize=(6, 5))

# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.title("Loss Evolution")
# plt.xlabel("Epochs")
# plt.ylabel("Loss")
# plt.legend()
# plt.savefig(os.path.join(model_metrics_dir, "loss_curve.png"))
# plt.close()

# plt.figure(figsize=(6, 5))
# plt.plot(history.history['accuracy'], label='Training Accuracy')
# plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
# plt.title("Accuracy Evolution")
# plt.xlabel("Epochs")
# plt.ylabel("Accuracy")
# plt.legend()
# plt.savefig(os.path.join(model_metrics_dir, "accuracy_curve.png"))
# plt.close()

In [None]:
# # Prédictions sur les données de test
# y_pred = model.predict(X_test)
# y_pred_classes = np.argmax(y_pred, axis=1)  # Convertir les probabilités en classes
# y_true = np.argmax(y_test, axis=1)  # Convertir les labels one-hot en indices de classes

# # Générer la matrice de confusion
# conf_matrix = confusion_matrix(y_true, y_pred_classes)
# disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=emotion_to_index.keys())

# # Afficher et sauvegarder la matrice de confusion
# plt.figure(figsize=(8, 8))
# disp.plot(cmap='Blues', values_format='d', ax=plt.gca())  # Utiliser les valeurs entières pour la matrice
# plt.title("Confusion Matrix")
# plt.savefig(os.path.join(model_metrics_dir, "confusion_matrix.png"))
# plt.close()


In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# emotion_counts = {emotion: 0 for emotion in emotion_to_index.keys()}
# for label in original_labels:
#     emotion_name = list(emotion_to_index.keys())[list(emotion_to_index.values()).index(label)]
#     emotion_counts[emotion_name] += 1

# emotions = list(emotion_counts.keys())
# counts = list(emotion_counts.values())

# plt.figure(figsize=(10, 6))
# plt.bar(emotions, counts, color='skyblue', edgecolor='black')
# plt.title("Répartition des émotions sélectionnées", fontsize=16)
# plt.xlabel("Émotions", fontsize=14)
# plt.ylabel("Nombre d'instances", fontsize=14)
# plt.xticks(rotation=45, fontsize=12)
# plt.yticks(fontsize=12)

# for i, count in enumerate(counts):
#     plt.text(i, count + 0.5, str(count), ha='center', fontsize=12)

# plt.tight_layout()
# plt.show()
