In [19]:
# =======================
# STEP 1: Imports & Setup
# =======================
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


# ==========================
# STEP 2: Load & Parse Files
# ==========================

audio_files = []
labels = []

emotion_dict = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}

def decode_emotion(filename):
    parts = filename.split("-")
    emotion_code = parts[2]
    return emotion_dict.get(emotion_code)

data_path = [
     '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24',
     '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Song_Actors_01-24'
]


for root_dir in data_path:
    for folder in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder)
        for file in os.listdir(folder_path):
            if file.endswith(".wav"):
                emotion = decode_emotion(file)
                if emotion:
                    audio_files.append(os.path.join(folder_path, file))
                    labels.append(emotion)




# ======================================
# STEP 3: Feature Extraction (Log-Mel)
# ======================================
def extract_logmel_features(file_path, sr=22050, duration=3):
    try:
        signal, rate = librosa.load(file_path, sr=sr, duration=duration)
        if len(signal) < sr * duration:
            pad_width = sr * duration - len(signal)
            signal = np.pad(signal, (0, pad_width))
        mel = librosa.feature.melspectrogram(y=signal, sr=rate, n_mels=128)
        mel_db = librosa.power_to_db(mel, ref=np.max)
        return mel_db
    except Exception as e:
        return None

features = []
final_labels = []

for path, lbl in zip(audio_files, labels):
    feature = extract_logmel_features(path)
    if feature is not None:
        features.append(feature)
        final_labels.append(lbl)


# ==================================
# STEP 4: Encode Labels & Preprocess
# ==================================
from sklearn.preprocessing import LabelEncoder

X = np.array(features)
X = X[..., np.newaxis]  # add channel
le = LabelEncoder()
y = le.fit_transform(final_labels)
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)


# ============================
# STEP 5: Model Architecture
# ============================
def create_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# ======================
# STEP 6: Train the Model
# ======================
model = create_model(X_train.shape[1:], y_train.shape[1])

#callbacks = [
    #EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    #ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)
#]

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=60,
    batch_size=32)
    #callbacks=callbacks
#)


# ==============================
# STEP 7: Evaluate & Visualize
# ==============================
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

print(classification_report(y_true_classes, y_pred_classes, target_names=le.classes_))

cm = confusion_matrix(y_true_classes, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_, cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


# ==================
# STEP 8: Save Model
# ==================


Epoch 1/60


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 146ms/step - accuracy: 0.1834 - loss: 10.1451 - val_accuracy: 0.1609 - val_loss: 15.7486
Epoch 2/60
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 154ms/step - accuracy: 0.1584 - loss: 2.0652 - val_accuracy: 0.1181 - val_loss: 8.5512
Epoch 3/60
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 160ms/step - accuracy: 0.1737 - loss: 2.0455 - val_accuracy: 0.1629 - val_loss: 2.0687
Epoch 4/60
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 159ms/step - accuracy: 0.1846 - loss: 2.0338 - val_accuracy: 0.1527 - val_loss: 2.0624
Epoch 5/60
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 161ms/step - accuracy: 0.1614 - loss: 2.0268 - val_accuracy: 0.1548 - val_loss: 2.0591
Epoch 6/60
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 163ms/step - accuracy: 0.1643 - loss: 2.0339 - val_accuracy: 0.1792 - val_loss: 2.0149
Epoch 7/60
[1m62/62[0m [32m━

KeyboardInterrupt: 