In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, Bidirectional, Dropout, Reshape
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import sys
import random

In [2]:
spectrograms_dir = '/content/drive/MyDrive/Speech_Emotion_Recogntion/audio_representations'
metrics_dir = '/content/drive/MyDrive/Speech_Emotion_Recogntion/metrics'

Mounted at /content/drive


In [3]:
images = []
labels = []

fraction = 1.0

emotion_to_index = {
    "angry": 0,
    "happy": 1,
    "sad": 2,
    "neutral": 3,
    "fearful": 4,
    "disgust": 5,
    "surprise": 6,
}

all_files = [file for file in os.listdir(spectrograms_dir) if file.endswith(".png")]
selected_files = random.sample(all_files, int(len(all_files) * fraction))

for file_name in selected_files:
    file_path = os.path.join(spectrograms_dir, file_name)

    img = Image.open(file_path).convert("RGB")
    img = img.resize((256, 256))
    img_array = np.array(img)

    images.append(img_array)

    emotion = file_name.split("_")[1]
    label = emotion_to_index.get(emotion, -1)
    if label == -1:
        raise ValueError(f"Émotion inconnue dans le fichier : {file_name}")

    labels.append(label)

images = np.array(images)
labels = np.array(labels)

# Normalize
images = images / 255.0

# One-hot
num_classes = len(emotion_to_index)
labels_one_hot = to_categorical(labels, num_classes=num_classes)

X_train, X_temp, y_train, y_temp = train_test_split(images, labels_one_hot, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Ensemble d'entraînement : {X_train.shape}, {y_train.shape}")
print(f"Ensemble de validation : {X_val.shape}, {y_val.shape}")
print(f"Ensemble de test : {X_test.shape}, {y_test.shape}")

Ensemble d'entraînement : (1008, 256, 256, 3), (1008, 8)
Ensemble de validation : (216, 256, 256, 3), (216, 8)
Ensemble de test : (216, 256, 256, 3), (216, 8)


In [4]:
def create_cnn_blstm_model(input_shape, num_classes):
    model = Sequential()

    # CNN Part
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

    # Flatten the output from CNN to pass into LSTM
    model.add(Flatten())

    # Reshape for LSTM layer
    # Reshape to have a sequence dimension for LSTM
    model.add(Reshape((1, -1)))  # Here, we reshape it to (batch_size, time_steps, features)

    # BLSTM Part
    model.add(Bidirectional(LSTM(64, return_sequences=False)))  # Bidirectional LSTM

    # Fully connected layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))  # Dropout to prevent overfitting

    model.add(Dense(num_classes, activation='softmax'))  # Final output layer

    # Compile model
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    return model

input_shape = (256, 256, 3)  # 3 channels (RGB)
model = create_cnn_blstm_model(input_shape, num_classes)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_val, y_val)
)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Loss on the test set : {test_loss}")
print(f"Accuracy on the test set : {test_accuracy}")

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 195ms/step - accuracy: 0.1659 - loss: 2.0727 - val_accuracy: 0.2593 - val_loss: 1.8616
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 128ms/step - accuracy: 0.3345 - loss: 1.7673 - val_accuracy: 0.3843 - val_loss: 1.5521
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 136ms/step - accuracy: 0.4850 - loss: 1.4036 - val_accuracy: 0.4815 - val_loss: 1.4323
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 129ms/step - accuracy: 0.5941 - loss: 1.1155 - val_accuracy: 0.5741 - val_loss: 1.2001
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 127ms/step - accuracy: 0.7107 - loss: 0.8607 - val_accuracy: 0.6296 - val_loss: 1.1155
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 133ms/step - accuracy: 0.7777 - loss: 0.6457 - val_accuracy: 0.6204 - val_loss: 1.1425
Epoch 7/20
[1m32/32[0m 

In [6]:
model_name = "CNN_BLSTM"
model_metrics_dir = os.path.join(metrics_dir, model_name)
os.makedirs(model_metrics_dir, exist_ok=True)



In [7]:
plt.figure(figsize=(6, 5))

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Loss Evolution")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig(os.path.join(model_metrics_dir, "loss_curve.png"))
plt.close()

plt.figure(figsize=(6, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Accuracy Evolution")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(os.path.join(model_metrics_dir, "accuracy_curve.png"))
plt.close()

In [8]:
emotion_labels = [
    "Angry", "Happy", "Sad", "Neutral", "Fearful", "Disgust", "Surprise"
]

y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)

disp = ConfusionMatrixDisplay(
    confusion_matrix=conf_matrix,
    display_labels=emotion_labels
)
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Confusion Matrix CNN on Spectrograms")

conf_matrix_path = os.path.join(model_metrics_dir, "confusion_matrix.png")
plt.savefig(conf_matrix_path)
plt.close()

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step


In [None]:
emotion_counts = {emotion: 0 for emotion in emotion_to_index.keys()}
for label in labels:
    emotion_name = list(emotion_to_index.keys())[list(emotion_to_index.values()).index(label)]
    emotion_counts[emotion_name] += 1

emotions = list(emotion_counts.keys())
counts = list(emotion_counts.values())

plt.figure(figsize=(10, 6))
plt.bar(emotions, counts, color='skyblue', edgecolor='black')
plt.title("Répartition des émotions sélectionnées", fontsize=16)
plt.xlabel("Émotions", fontsize=14)
plt.ylabel("Nombre d'instances", fontsize=14)
plt.xticks(rotation=45, fontsize=12)
plt.yticks(fontsize=12)

for i, count in enumerate(counts):
    plt.text(i, count + 0.5, str(count), ha='center', fontsize=12)

plt.tight_layout()
plt.show()