In [1]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten

Load preprocessed data

In [2]:
X = np.load("features.npy")  # We'll save this in a bit
y = np.load("labels.npy")

Encode string labels into numbers

In [3]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

Train-test split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42
)

Model architecture

In [5]:
model = Sequential()
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(40, 173)))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.3))

In [7]:
model.add(Dense(64, activation='relu'))
model.add(Dense(y_categorical.shape[1], activation='softmax'))

In [8]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Training

In [9]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/30
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 22ms/step - accuracy: 0.1399 - loss: 2.1336 - val_accuracy: 0.1771 - val_loss: 2.0377
Epoch 2/30
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1938 - loss: 2.0267 - val_accuracy: 0.2674 - val_loss: 1.9621
Epoch 3/30
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.2305 - loss: 1.9764 - val_accuracy: 0.2674 - val_loss: 1.9552
Epoch 4/30
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2122 - loss: 1.9666 - val_accuracy: 0.2847 - val_loss: 1.9073
Epoch 5/30
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2571 - loss: 1.9165 - val_accuracy: 0.2778 - val_loss: 1.8685
Epoch 6/30
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.2721 - loss: 1.8698 - val_accuracy: 0.2917 - val_loss: 1.8546
Epoch 7/30
[1m36/36[0m [32m━━━━

Save model

In [10]:
model.save("emotion_model.h5")



Evaluate

In [11]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test accuracy:", test_acc)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3950 - loss: 1.6482 
Test accuracy: 0.3645833432674408


# 🔍 Model Analysis

Your model is learning something, but:

- ❗ It's **overfitting** (high train accuracy, low test accuracy)
- ❗ The model may be **too simple** or not generalized enough
- ❗ **Dataset is imbalanced** (some emotions like *neutral* or *calm* dominate)

---

## 🧠 What Can You Do to Improve?

### ✅ 1. Model Improvements
- Add more **Conv1D + Pooling layers**
- Add **Batch Normalization**
- Try **Bidirectional LSTM** instead of plain LSTM

---

### ✅ 2. Use Data Augmentation
- **Pitch shifting**
- **Time stretching**
- **Background noise**

> This helps make the model more robust to real-world voices!

---

### ✅ 3. Use Stratified Train-Test Split
- Ensures **emotion classes** are balanced during training.
- To ensure emotion classes are balanced during training.

---

### ✅ 4. Tune Hyperparameters
- Try different values for:
  - `batch_size`
  - `epochs`
  - `dropout`
  - learning rate, etc.

---
