### **DATA PREPROCESSING**

In [None]:
import gc
import pandas as pd
import csv
import librosa
import numpy as np
from glob import glob
import zipfile
import os
import tensorflow.keras as keras
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten

In [None]:
zip_file= "Emotion_zipped.zip"
target_dir = "Emotion"

if not os.path.exists(target_dir):
    os.makedirs(target_dir)

with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(target_dir)

In [None]:
output_csv_path ="hsr_all_test.csv"
emotions = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad']
with open(output_csv_path, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write the header
    csv_writer.writerow(['Emotion', 'Path'])

    # Write data to the CSV file for each emotion
    for emotion in emotions:
        emotion_paths = glob(f'Emotions/{emotion}/*.wav')
        emotion_paths = sorted(emotion_paths)

        for path in emotion_paths:
            # Extract only the {emotion}/*.wav part from the full path
            relative_path = os.path.join(emotion, os.path.basename(path))
            csv_writer.writerow([emotion, relative_path])


In [None]:
def split_data(dataset,test_ratio=0.1,valid_ratio=0.1):
  df=pd.read_csv(dataset)
  shuffled_df = df.sample(frac=1).reset_index(drop=True)
  train_index = int(len(shuffled_df)*(1-test_ratio-valid_ratio))


  df_train = shuffled_df[:train_index]

  valid_index = int(len(shuffled_df)*valid_ratio)

  df_valid = shuffled_df[train_index:train_index+valid_index]
  df_valid = df_valid.reset_index(drop=True)
  df_test = shuffled_df[train_index+valid_index:]
  df_test = df_test.reset_index(drop=True)
  del df
  gc.collect()
  return df_train, df_valid, df_test


In [None]:
df_train,df_valid,df_test = split_data("hsr_all_test.csv")
df_train.to_csv('df_train.csv')
df_valid.to_csv('df_valid.csv')
df_test.to_csv('df_test.csv')

### **FEATURE EXTRACTION**


In [None]:
def feature_extraction(file_path):
    y, sr = librosa.load(f"Emotions/{file_path}")
    mfcc = np.array(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=64))
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_min = mfcc.min(axis=1)
    mfcc_max = mfcc.max(axis=1)
    mfcc_features = np.concatenate( (mfcc_mean, mfcc_min, mfcc_max) )
    #features=mfcc_features
    melspectrogram = np.array(librosa.feature.melspectrogram(y=y, sr=sr))
    melspectrogram_mean = melspectrogram.mean(axis=1)
    melspectrogram_min = melspectrogram.min(axis=1)
    melspectrogram_max = melspectrogram.max(axis=1)
    melspectrogram_features = np.concatenate( (melspectrogram_mean, melspectrogram_min, melspectrogram_max) )
    spectral_centroid = np.array(librosa.feature.spectral_centroid(y=y , sr=sr))
    mean_centroid = spectral_centroid.mean(axis=1)
    min_centroid = spectral_centroid.min(axis=1)
    max_centroid = spectral_centroid.max(axis=1)
    spectral_centroid_features = np.concatenate( (mean_centroid, min_centroid, max_centroid) )
    features = np.concatenate( (mfcc_features,  melspectrogram_features,spectral_centroid_features) )
    return features


In [None]:
feature_train= df_train['Path'].apply(lambda path:feature_extraction(path))
feature_test= df_test['Path'].apply(lambda path:feature_extraction(path))
feature_valid= df_valid['Path'].apply(lambda path:feature_extraction(path))

In [None]:
feature_train.to_csv('train_feature.csv',index=False)
feature_test.to_csv('test_feature.csv',index=False)
feature_valid.to_csv('valid_feature.csv',index=False)

In [None]:
# Read the CSV file
train_feature_df = pd.read_csv('train_feature.csv')
test_feature_df = pd.read_csv('test_feature.csv')
valid_feature_df = pd.read_csv('valid_feature.csv')

In [None]:
feature_train = train_feature_df['Path'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
feature_test = test_feature_df['Path'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
feature_valid = valid_feature_df['Path'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

In [None]:
X_tr=np.array(feature_train.values.tolist())
X_train=X_tr.reshape((X_tr.shape[0],X_tr.shape[1],1))
print(X_train.shape)
X_val=np.array(feature_valid.values.tolist())
X_validation=X_val.reshape((X_val.shape[0],X_val.shape[1],1))
print(X_validation.shape)
X_test=np.array(feature_test.values.tolist())
X_test=X_test.reshape((X_test.shape[0],X_test.shape[1],1))
print(X_test.shape)

### **PADDING**

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
encoder = OneHotEncoder()
df_train = pd.read_csv('df_train.csv')
df_valid = pd.read_csv('df_valid.csv')
df_test = pd.read_csv('df_test.csv')

Y_train = df_train['Emotion'].values
Y_train = encoder.fit_transform(np.array(Y_train).reshape(-1,1)).toarray()
Y_valid = df_valid['Emotion'].values
print(Y_valid.shape)
Y_valid = encoder.fit_transform(np.array(Y_valid).reshape(-1,1)).toarray()

Y_test = df_test['Emotion'].values
Y_test = encoder.fit_transform(np.array(Y_test).reshape(-1,1)).toarray()

In [None]:
model_LSTM=keras.Sequential()
model_LSTM.add(Conv1D(512, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model_LSTM.add(MaxPooling1D(pool_size=2))
model_LSTM.add(Conv1D(256, kernel_size=3, activation='relu'))
model_LSTM.add(MaxPooling1D(pool_size=2))
model_LSTM.add(LSTM(256,activation='tanh',return_sequences=True,dropout=0.30))
model_LSTM.add(LSTM(64,activation='tanh',dropout=0.30))
model_LSTM.add(Dense(32,activation = 'relu'))
model_LSTM.add(Dense(6,activation = 'softmax'))
optimizer = Adam(learning_rate=0.0001)
model_LSTM.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
model_LSTM.summary()

In [None]:
hist = model_LSTM.fit(X_train, Y_train,
                   epochs=30,
                   validation_data=(X_validation, Y_valid),
                   batch_size=4,
                   verbose=1)


train_loss = hist.history['loss']
val_loss = hist.history['val_loss']
train_accuracy = hist.history['accuracy']
val_accuracy = hist.history['val_accuracy']

#Loss Graph
plt.figure(figsize=(8, 4))
plt.plot(train_loss, label='Training Loss', color='blue')
plt.plot(val_loss, label='Validation Loss', color='orange')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

In [None]:
#Model Save
model_LSTM.save('MODEL_NAME.keras')

In [None]:
#Load Model
loaded_model = load_model('MODEL_NAME.keras')

test_loss, test_accuracy = loaded_model.evaluate(X_test, Y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")