In [404]:
import os
import pandas as pd  
import random
import matplotlib.pyplot as plt 
import seaborn as sns 
import librosa
import tensorflow as tf 
from keras.layers import Dense,Conv1D,LSTM,Bidirectional,MaxPooling1D,Dropout,Bidirectional,Flatten,BatchNormalization
from keras.utils import to_categorical
from keras.models import Sequential 
from keras.optimizers import Adam
from keras.metrics import AUC
from sklearn.preprocessing import LabelBinarizer,LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np
import warnings
warnings.filterwarnings('ignore')
data_path = "D:/Emotions/Emotions"
filenames = os.listdir(data_path)
for name in filenames:
    print(f"Number of files in folder {name} are ",len(os.listdir((os.path.join(data_path,name)))))

Number of files in folder Angry are  2167
Number of files in folder Disgusted are  1863
Number of files in folder Fearful are  2047
Number of files in folder Happy are  2167
Number of files in folder Neutral are  1795
Number of files in folder Sad are  2167
Number of files in folder Suprised are  592


In [276]:
def training_and_testing_data(data_path, folder_name, split_ratio):
    files = os.listdir(os.path.join(data_path, folder_name))
    total_files = len(files)
    value = int(split_ratio * total_files)

    train_files = files[:value]
    test_files = files[value:]

    train_data = pd.DataFrame({
        'Filepath': [os.path.join(data_path, folder_name, f) for f in train_files],
        'Emotion': [folder_name] * len(train_files)
    })

    test_data = pd.DataFrame({
        'Filepath': [os.path.join(data_path, folder_name, f) for f in test_files],
        'Emotion': [folder_name] * len(test_files)
    })

    return train_data, test_data


train_data = pd.DataFrame(columns=['Filepath', 'Emotion'])
test_data = pd.DataFrame(columns=['Filepath', 'Emotion'])


folder_names = os.listdir(data_path)
for folder_name in folder_names:
    curr_train, curr_test = training_and_testing_data(data_path, folder_name, 0.8)
    train_data = pd.concat([train_data, curr_train], ignore_index=True)
    test_data = pd.concat([test_data, curr_test], ignore_index=True)

train_data = train_data.sample(frac=1).reset_index(drop=True)
test_data = test_data.sample(frac=1).reset_index(drop=True)

In [277]:
# def plot_audio_features(file_path):
#     y, sr = librosa.load(file_path)

#     plt.figure(figsize=(14, 12))

#     # 1. Waveform
#     plt.subplot(3, 2, 1)
#     librosa.display.waveshow(y, sr=sr)
#     plt.title('Waveform')

#     # 2. Spectrogram (log power)
#     plt.subplot(3, 2, 2)
#     D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
#     librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
#     plt.colorbar(format="%+2.0f dB")
#     plt.title('Spectrogram')

#     # 3. MFCCs
#     plt.subplot(3, 2, 3)
#     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
#     librosa.display.specshow(mfccs, sr=sr, x_axis='time')
#     plt.colorbar()
#     plt.title('MFCCs')

#     # 4. Chroma
#     plt.subplot(3, 2, 4)
#     chroma = librosa.feature.chroma_stft(y=y, sr=sr)
#     librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', sr=sr)
#     plt.colorbar()
#     plt.title('Chroma Feature')

#     # 5. Spectral Centroid
#     plt.subplot(3, 2, 5)
#     spec_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
#     frames = range(len(spec_centroid))
#     t = librosa.frames_to_time(frames)
#     librosa.display.waveshow(y, sr=sr, alpha=0.4)
#     plt.plot(t, spec_centroid, color='r')
#     plt.title('Spectral Centroid')

#     # 6. Pitch (F0)
#     plt.subplot(3, 2, 6)
#     f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'),
#                                                  fmax=librosa.note_to_hz('C7'))
#     times = librosa.times_like(f0)
#     plt.plot(times, f0, label='f0', color='g')
#     plt.title('Estimated Pitch (F0)')
#     plt.xlabel("Time (s)")
#     plt.ylabel("Frequency (Hz)")

#     plt.tight_layout()
#     plt.show()
#     return 
# plot_audio_features(train_data['Filepath'][0])

In [389]:
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler

def extract_features(file_path, n_mfcc=40, target_length=120, noise_factor=0.005):
    y, sr = librosa.load(file_path, sr=16000)
    noise = np.random.randn(len(y))
    y_noisy = y + noise_factor * noise
    mfcc = librosa.feature.mfcc(y=y_noisy, sr=sr, n_mfcc=n_mfcc)
    scaler = StandardScaler()
    mfcc = scaler.fit_transform(mfcc.T).T   
    if mfcc.shape[1] < target_length:
        padding = np.zeros((n_mfcc, target_length - mfcc.shape[1]))
        mfcc = np.hstack((mfcc, padding))
    elif mfcc.shape[1] > target_length:
        mfcc = mfcc[:, :target_length]
    return mfcc


In [390]:
features = extract_features(train_data['Filepath'][2])
features.shape

(40, 120)

In [391]:
train_data

Unnamed: 0,Filepath,Emotion
0,D:/Emotions/Emotions\Angry\1037_TSI_ANG_XX.wav,Angry
1,D:/Emotions/Emotions\Neutral\1079_IWW_NEU_XX.wav,Neutral
2,D:/Emotions/Emotions\Disgusted\1022_IWL_DIS_XX...,Disgusted
3,D:/Emotions/Emotions\Disgusted\1073_IWW_DIS_XX...,Disgusted
4,D:/Emotions/Emotions\Happy\1073_ITS_HAP_XX.wav,Happy
...,...,...
10230,D:/Emotions/Emotions\Angry\03-02-05-01-01-01-0...,Angry
10231,D:/Emotions/Emotions\Neutral\1019_DFA_NEU_XX.wav,Neutral
10232,D:/Emotions/Emotions\Neutral\1069_IEO_NEU_XX.wav,Neutral
10233,D:/Emotions/Emotions\Sad\1047_DFA_SAD_XX.wav,Sad


In [392]:
training_data = []
testing_data = []

train_paths = train_data['Filepath'].values 
test_paths = test_data['Filepath'].values 

for path in train_paths:
    emotion = path.split('\\')[1] 
    vector = extract_features(path)
    training_data.append([vector, emotion])

for path in test_paths:
    emotion = path.split('\\')[1]  
    vector = extract_features(path)
    testing_data.append([vector, emotion])


In [398]:
import pickle
with open("training_data.pkl", "wb") as f:
    pickle.dump(training_data, f)
with open("testing_data.pkl", "wb") as f:
    pickle.dump(testing_data, f)

In [399]:
with open("training_data.pkl", "rb") as f:
    training_data = pickle.load(f)

with open("testing_data.pkl", "rb") as f:
    testing_data = pickle.load(f)

In [400]:
train_values = []
train_labels = [] 
test_values  = [] 
test_labels  = []

for vector, emotion in training_data:
    train_values.append(vector)
    train_labels.append(emotion)

for vector, emotion in testing_data:
    test_values.append(vector)
    test_labels.append(emotion)

print(len(train_values))
print(len(train_labels))
print(len(test_values))
print(len(test_labels))


10235
10235
2563
2563


In [401]:
train_values = np.array(train_values)
test_values = np.array(test_values)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
print(train_values.shape)  # Should be (num_train_samples, 40, 180, 1)
print(train_binary.shape)  # Should be (num_train_samples, 7) if using one-hot encoding
print(test_values.shape)   # Should be (num_test_samples, 40, 180, 1)
print(test_binary.shape)   # Should be (num_test_samples, 7) if using one-hot encoding


(10235, 40, 120)
(10235, 7)
(2563, 40, 120)
(2563, 7)


In [402]:
train_values = train_values.reshape(-1,40,120,1)
test_values  = test_values.reshape(-1,40,120,1)

In [429]:
lb = LabelBinarizer()
train_binary = lb.fit_transform(train_labels)
test_binary = lb.fit_transform(test_labels)
print(lb.classes_)   
print(train_binary)     

['Angry' 'Disgusted' 'Fearful' 'Happy' 'Neutral' 'Sad' 'Suprised']
[[1 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]
 [0 1 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]]


In [441]:
train_values[0].shape

(40, 120, 1)

In [451]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC

model = Sequential()

model.add(Conv2D(8, (3, 3), activation = 'relu', padding = 'same', input_shape = (40,120,1)))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(16, (3, 3), activation = 'relu', padding = 'same'))
model.add(MaxPooling2D(pool_size = (2, 2),padding='same'))
model.add(Dropout(0.2))

model.add(Conv2D(32, (3, 3), activation = 'relu', padding = 'same'))
model.add(MaxPooling2D(pool_size = (2, 2),padding='same'))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation = 'relu', padding = 'same'))
model.add(MaxPooling2D(pool_size = (2, 2),padding='same'))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3, 3), activation = 'relu', padding = 'same'))
model.add(MaxPooling2D(pool_size = (2, 2),padding='same'))
model.add(Dropout(0.2))

model.add(Conv2D(256, (3, 3), activation = 'relu', padding = 'same'))
model.add(MaxPooling2D(pool_size = (2, 2),padding='same'))
model.add(Dropout(0.2))


model.add(Conv2D(512, (3, 3), activation = 'relu', padding = 'same'))
model.add(MaxPooling2D(pool_size = (2, 2),padding='same'))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(7, activation = 'softmax'))
model.summary()

In [452]:
print(train_values.shape)  # Should be (num_train_samples, 40, 180, 1)
print(train_binary.shape)  # Should be (num_train_samples, 7) if using one-hot encoding
print(test_values.shape)   # Should be (num_test_samples, 40, 180, 1)
print(test_binary.shape)   # Should be (num_test_samples, 7) if using one-hot encoding


(10235, 40, 120, 1)
(10235, 7)
(2563, 40, 120, 1)
(2563, 7)


In [453]:
optimizer = Adam(learning_rate = 0.0005, beta_1 = 0.9, beta_2 = 0.999)
early_stop = EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True, verbose=1)
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])

In [454]:
model.fit(train_values, train_binary,
          epochs=100,
          batch_size=32,
          validation_data=(test_values, test_binary),
          callbacks=[early_stop])

Epoch 1/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 94ms/step - accuracy: 0.1810 - loss: 2.0785 - val_accuracy: 0.1693 - val_loss: 1.9047
Epoch 2/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 99ms/step - accuracy: 0.2824 - loss: 1.7522 - val_accuracy: 0.1791 - val_loss: 1.9820
Epoch 3/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 101ms/step - accuracy: 0.3415 - loss: 1.6371 - val_accuracy: 0.2099 - val_loss: 1.7333
Epoch 4/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 103ms/step - accuracy: 0.3747 - loss: 1.5577 - val_accuracy: 0.2372 - val_loss: 1.6719
Epoch 5/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 102ms/step - accuracy: 0.3982 - loss: 1.5006 - val_accuracy: 0.3691 - val_loss: 1.6398
Epoch 6/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 100ms/step - accuracy: 0.4062 - loss: 1.4818 - val_accuracy: 0.3512 - val_loss: 1.6413
Epoch 

<keras.src.callbacks.history.History at 0x23986f3bd10>

In [455]:
model.save('saved_model.keras')