In [3]:
import librosa
import numpy as np

def extract_log_mel_spectrogram(file_path, n_mels=128, duration=3, sr=22050):
    audio, sr = librosa.load(file_path, sr=sr, mono=True, duration=duration)
    mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
    log_mel = librosa.power_to_db(mel, ref=np.max)
    
    # Optional: pad/truncate to fixed shape
    if log_mel.shape[1] < 128:
        pad_width = 128 - log_mel.shape[1]
        log_mel = np.pad(log_mel, pad_width=((0,0), (0,pad_width)), mode='constant')
    else:
        log_mel = log_mel[:, :128]
    
    return log_mel


In [5]:
import os
import glob
emotion_map = {
    '01': 0,  # neutral
    '02': 1,  # calm
    '03': 2,  # happy
    '04': 3,  # sad
    '05': 4,  # angry
    '06': 5,  # fearful
    '07': 6,  # disgust
    '08': 7   # surprised
}

X, y = [], []

base_path = '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24'

for folder in sorted(os.listdir(base_path)):
    folder_path = os.path.join(base_path, folder)
    for file_path in glob.glob(os.path.join(folder_path, "*.wav")):
        log_mel = extract_log_mel_spectrogram(file_path)
        X.append(log_mel)

        # Get the emotion label from the filename
        filename = os.path.basename(file_path)
        parts = filename.split("-")
        emotion_code = parts[2]  # e.g., "06"
        emotion_label = emotion_map.get(emotion_code)

        y.append(emotion_label)

X = np.array(X)
y = np.array(y)
X = X[..., np.newaxis]



In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
y_train_cat = to_categorical(y_train)
y_val_cat = to_categorical(y_val)


In [7]:
print(y_train)

[6 5 1 ... 1 4 6]


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, AveragePooling2D, Flatten, Dense, Dropout, BatchNormalization

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    BatchNormalization(),
    AveragePooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    AveragePooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    AveragePooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(y)), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
from tensorflow.keras.callbacks import EarlyStopping

#early_stop = EarlyStopping(patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=70,
                    batch_size=32)
#callbacks=[early_stop])


Epoch 1/70
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 184ms/step - accuracy: 0.2381 - loss: 4.7658 - val_accuracy: 0.0660 - val_loss: 84.1653
Epoch 2/70
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 197ms/step - accuracy: 0.2591 - loss: 1.9055 - val_accuracy: 0.0833 - val_loss: 22.5460
Epoch 3/70
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 198ms/step - accuracy: 0.2800 - loss: 1.8625 - val_accuracy: 0.1389 - val_loss: 7.3637
Epoch 4/70
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 201ms/step - accuracy: 0.2805 - loss: 1.7672 - val_accuracy: 0.1771 - val_loss: 2.2761
Epoch 5/70
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 207ms/step - accuracy: 0.3343 - loss: 1.6651 - val_accuracy: 0.2812 - val_loss: 1.8482
Epoch 6/70
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 203ms/step - accuracy: 0.3408 - loss: 1.6439 - val_accuracy: 0.3542 - val_loss: 1.8360
Epoch 7/70
[1m36/36[0m 

In [10]:
from sklearn.metrics import classification_report

y_pred = np.argmax(model.predict(X_val), axis=1)
print(classification_report(y_val, y_pred))


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
              precision    recall  f1-score   support

           0       0.45      0.53      0.49        19
           1       0.88      0.61      0.72        38
           2       0.62      0.39      0.48        38
           3       0.63      0.58      0.60        38
           4       0.54      0.90      0.67        39
           5       0.92      0.56      0.70        39
           6       0.62      0.79      0.70        38
           7       0.73      0.82      0.77        39

    accuracy                           0.66       288
   macro avg       0.68      0.65      0.64       288
weighted avg       0.69      0.66      0.65       288

