## 1. Installing Libraries

In [None]:
# Installing librosa
!pip install librosa

# Installing tensorflow
!pip install "tensorflow<2.11"

!pip install pandas

!pip install seaborn

In [None]:
# Importing os for file management
import os

# Importing numpy
import numpy as np

import pandas as pd

# Importin librosa, soundfile, wiener
import librosa
import soundfile
from scipy.signal import wiener

# Importing tensorflow
import tensorflow as tf

# Importing neural netwrok components
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D ,Bidirectional ,LSTM, Dense, Flatten, Reshape, BatchNormalization
from tensorflow.keras.regularizers import l2 
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

# Importing OneHotencoder and train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

## 2. Prepocessing and making the labelled dataset

In [None]:
# Function for Wiener filtering with safety check
def safe_wiener(audio_signal):
    if len(audio_signal) > 1:
        return wiener(audio_signal)
    return audio_signal  # If the signal is too short, skip filtering

In [None]:
import os
import librosa
import numpy as np
from scipy.signal import wiener

# Path to the EmoDB dataset
source_dir = "./emodb"

# Emotion mapping based on EmoDB file naming conventions
emotion_map = {
    'W': 'angry',
    'L': 'boredom',
    'E': 'disgust',
    'A': 'fearful',
    'F': 'happy',
    'T': 'sad',
    'N': 'neutral'
}

audio_data = []
labels = []

# Function for Wiener filtering with safety check
def safe_wiener(audio_signal):
    if len(audio_signal) > 1:
        return wiener(audio_signal)
    return audio_signal  # If the signal is too short, skip filtering


processed_files = []
skipped_files = []

for filename in os.listdir(source_dir):
    emotion_key = filename[5]  # Emotion code is the 6th character
    emotion_label = emotion_map.get(emotion_key)
                
    # Load the audio file
    audio_path = os.path.join(source_dir, filename)
    audio, sr = librosa.load(audio_path, sr=None)  # Load with original sampling rate

    # Resampling to 44.1 kHz
    target_sr = 44100
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
    
    # Silence Trimming
    audio, _ = librosa.effects.trim(audio)
    
    # Wiener Filtering for noise reduction
    audio = safe_wiener(audio)
    
    # Zero Padding or truncating to 3 seconds (132300 samples at 44.1 kHz)
    desired_length = target_sr * 3  # 3 seconds of audio
    if len(audio) < desired_length:
        audio = np.pad(audio, (0, desired_length - len(audio)), mode="constant")
    else:
        audio = audio[:desired_length]

    # Convert to Mel-Spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(
        y=audio,
        sr=target_sr,
        n_mels=128,
        hop_length=512,
        win_length=2048,  # Hanning window length
        window="hann"  # Apply Hanning window
    )
    
    # Convert to log scale (decibels)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    
    # Append the processed log-mel spectrogram and label to lists
    audio_data.append(log_mel_spectrogram)
    labels.append(emotion_label)

In [None]:
# Features and labels
X = np.array(audio_data)

X = np.expand_dims(X, axis = -1)

y = np.array(labels)

# Encoding the labels
encode = OneHotEncoder(sparse_output=False)
y_encode = encode.fit_transform(y.reshape(-1, 1))

# Train-test splitting
X_train, X_test, y_train, y_test = train_test_split(X, y_encode, test_size=0.2, random_state=42, stratify=y_encode)

## 3. Building and Training the model

In [None]:
model = Sequential()

model.add(Input(shape = (128, 259, 1)))

# 1st Convolution Layer
model.add(Conv2D(64, kernel_size = (9,9), strides = (2,2), activation = "relu", padding = "same", kernel_regularizer = l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (2,2), strides = (2,2), padding = "same"))

# 2nd Convolution layer
model.add(Conv2D(64, kernel_size = (7,7), strides = (1,1), activation = "relu", padding = "same", kernel_regularizer = l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (4,4), strides = (4,4), padding = "same"))

# 3rd Convolution Layer
model.add(Conv2D(128, kernel_size = (5,5), strides = (1,1), activation = "relu", padding = "same", kernel_regularizer = l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (4,4), strides = (4,4), padding = "same"))

# 4th Convolution layer
model.add(Conv2D(128, kernel_size = (3,3), strides = (1,1), activation = "relu", padding = "same", kernel_regularizer = l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (4,4), strides = (4,4), padding = "same"))

conv_output_shape = model.output_shape[1:]  # Output shape from Conv2D layers

# Reshape layer before BiLSTM
model.add(Reshape((conv_output_shape[0], conv_output_shape[1] * conv_output_shape[2])))

# BiLSTM Layer
model.add(Bidirectional(LSTM(128)))

# Dense Layer
model.add(Dense(8, activation = "softmax"))

model.compile(optimizer = Adam(learning_rate = 0.0001), loss = CategoricalCrossentropy(from_logits = False), metrics = ["accuracy"])

In [None]:
model.summary()

In [None]:
# Traing the model
model.fit(X_train, y_train, batch_size = 16, epochs = 10)

## 4. Testing the model

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Make predictions on the test set
y_pred = model.predict(X_test)

# Convert predictions and true labels to class indices
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Calculate the confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes, normalize='true')

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Normalized Confusion Matrix')
plt.show()

In [None]:
from sklearn.metrics import classification_report
import numpy as np


y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

report = classification_report(y_test_classes, y_pred_classes, target_names=encode.categories_[0])
print(report)

In [None]:
model.save("models/first_model.h5")

In [None]:
model = load_model("models/first_model.h5")