In [13]:
import numpy as np
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import sys
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [14]:
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    spectrograms_dir = "/content/drive/MyDrive/audio_representations/RAVDESS/spectrograms"
else :
    spectrograms_dir = "audio_representations/spectrograms"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
images = []
labels = []

for file_name in os.listdir(spectrograms_dir):
    if file_name.endswith(".png"):
        file_path = os.path.join(spectrograms_dir, file_name)

        # Convert to numpy
        img = Image.open(file_path).convert("RGB")
        img = img.resize((256, 256))
        img_array = np.array(img)

        images.append(img_array)

        label = int(file_name.split("-")[2]) - 1
        labels.append(label)

images = np.array(images)
labels = np.array(labels)

# Normalize
images = images / 255.0

# One hot
num_classes = len(np.unique(labels))
labels_one_hot = to_categorical(labels, num_classes=num_classes)

X_train, X_temp, y_train, y_temp = train_test_split(images, labels_one_hot, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Ensemble d'entraînement : {X_train.shape}, {y_train.shape}")
print(f"Ensemble de validation : {X_val.shape}, {y_val.shape}")
print(f"Ensemble de test : {X_test.shape}, {y_test.shape}")

Ensemble d'entraînement : (1008, 256, 256, 3), (1008, 8)
Ensemble de validation : (216, 256, 256, 3), (216, 8)
Ensemble de test : (216, 256, 256, 3), (216, 8)


In [16]:
# Transform 2D images into 2D sequences for LSTM
# X_train.shape[1] = 256 (height), X_train.shape[2] = 256 (width), 3 (color channels)
X_train_lstm = X_train.reshape(X_train.shape[0], X_train.shape[1], -1)  # 256, 256*3
X_val_lstm = X_val.reshape(X_val.shape[0], X_val.shape[1], -1)  # 256, 256*3
X_test_lstm = X_test.reshape(X_test.shape[0], X_test.shape[1], -1)  # 256, 256*3

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

def create_lstm_model(input_shape, num_classes):
    model = Sequential()

    # Normalisation des entrées
    model.add(BatchNormalization(input_shape=input_shape))

    # Première couche LSTM avec retour de séquences
    model.add(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.2))

    # Deuxième couche LSTM sans retour de séquences
    model.add(LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.2))

    # Couche dense pour la classification
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))  # Dropout pour éviter le surapprentissage

    # Couche de sortie pour la classification multiclasse
    model.add(Dense(num_classes, activation='softmax'))

    # Compilation du modèle avec l'optimiseur Adam et la perte categorical crossentropy
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

# Définir la forme d'entrée pour LSTM (séquence de 256, avec 256*3 caractéristiques à chaque pas de temps)
input_shape = (256, 256 * 3)  # 256 timesteps, 256*3 caractéristiques (RGB)
model = create_lstm_model(input_shape, num_classes)

model.summary()


  super().__init__(**kwargs)


In [18]:
history = model.fit(
    X_train_lstm, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_val_lstm, y_val)
)

test_loss, test_accuracy = model.evaluate(X_test_lstm, y_test)
print(f"Loss on the test set: {test_loss}")
print(f"Accuracy on the test set: {test_accuracy}")

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.1101 - loss: 2.1183 - val_accuracy: 0.1389 - val_loss: 2.0780
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - accuracy: 0.1774 - loss: 2.0397 - val_accuracy: 0.1713 - val_loss: 2.0426
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - accuracy: 0.2173 - loss: 1.9884 - val_accuracy: 0.1713 - val_loss: 2.0436
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - accuracy: 0.2360 - loss: 1.9424 - val_accuracy: 0.1713 - val_loss: 2.0215
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 1s/step - accuracy: 0.2557 - loss: 1.9034 - val_accuracy: 0.1667 - val_loss: 2.0063
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 2s/step - accuracy: 0.3022 - loss: 1.8294 - val_accuracy: 0.1713 - val_loss: 1.9929
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━

In [19]:
save_path = '/content/drive/MyDrive/models/lstm_ravdess_spectrograms'

model.save(save_path + "/weights.h5")



In [20]:
plt.figure(figsize=(6, 5))

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Loss Evolution")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig(os.path.join(save_path, "loss_curve.png"))
plt.close()

plt.figure(figsize=(6, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Accuracy Evolution")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(os.path.join(save_path, "accuracy_curve.png"))
plt.close()


In [21]:
emotion_labels = [
    "Neutral", "Calm", "Happy", "Sad", "Angry",
    "Fearful", "Disgust", "Surprised"
]

X_test_lstm = X_test.reshape(X_test.shape[0], X_test.shape[1], -1)

y_pred = model.predict(X_test_lstm)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)

disp = ConfusionMatrixDisplay(
    confusion_matrix=conf_matrix,
    display_labels=emotion_labels
)
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Confusion Matrix LSTM on Spectrograms")
conf_matrix_path = os.path.join(save_path, "confusion_matrix.png")
plt.savefig(conf_matrix_path)
plt.close()


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 319ms/step
