In [None]:
import pandas as pd
from pykalman import KalmanFilter
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import pickle


In [31]:

data = pd.read_csv("emotions.csv")



In [32]:
data.head()

Unnamed: 0,# mean_0_a,mean_1_a,mean_2_a,mean_3_a,mean_4_a,mean_d_0_a,mean_d_1_a,mean_d_2_a,mean_d_3_a,mean_d_4_a,...,fft_741_b,fft_742_b,fft_743_b,fft_744_b,fft_745_b,fft_746_b,fft_747_b,fft_748_b,fft_749_b,label
0,4.62,30.3,-356.0,15.6,26.3,1.07,0.411,-15.7,2.06,3.15,...,23.5,20.3,20.3,23.5,-215.0,280.0,-162.0,-162.0,280.0,NEGATIVE
1,28.8,33.1,32.0,25.8,22.8,6.55,1.68,2.88,3.83,-4.82,...,-23.3,-21.8,-21.8,-23.3,182.0,2.57,-31.6,-31.6,2.57,NEUTRAL
2,8.9,29.4,-416.0,16.7,23.7,79.9,3.36,90.2,89.9,2.03,...,462.0,-233.0,-233.0,462.0,-267.0,281.0,-148.0,-148.0,281.0,POSITIVE
3,14.9,31.6,-143.0,19.8,24.3,-0.584,-0.284,8.82,2.3,-1.97,...,299.0,-243.0,-243.0,299.0,132.0,-12.4,9.53,9.53,-12.4,POSITIVE
4,28.3,31.3,45.2,27.3,24.5,34.8,-5.79,3.06,41.4,5.52,...,12.0,38.1,38.1,12.0,119.0,-17.6,23.9,23.9,-17.6,NEUTRAL


In [33]:
data.shape

(2132, 2549)

In [34]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2132 entries, 0 to 2131
Columns: 2549 entries, # mean_0_a to label
dtypes: float64(2548), object(1)
memory usage: 41.5+ MB


In [35]:
data.isnull().sum()

# mean_0_a    0
mean_1_a      0
mean_2_a      0
mean_3_a      0
mean_4_a      0
             ..
fft_746_b     0
fft_747_b     0
fft_748_b     0
fft_749_b     0
label         0
Length: 2549, dtype: int64

In [36]:
data['label'].unique()

array(['NEGATIVE', 'NEUTRAL', 'POSITIVE'], dtype=object)

In [38]:
label_encoder = LabelEncoder()
data['encoded_label'] = label_encoder.fit_transform(data['label'])

with open("label_encoder.pkl", "wb") as file:
    pickle.dump(label_encoder, file)

print("Encoded Labels Mapping:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))


Encoded Labels Mapping: {'NEGATIVE': 0, 'NEUTRAL': 1, 'POSITIVE': 2}


In [41]:
def apply_kalman_filter(signal):
    signal = np.reshape(signal, (-1, 1)) 
    kf = KalmanFilter(initial_state_mean=np.mean(signal), n_dim_obs=1)
    state_means, _ = kf.filter(signal)
    return state_means.flatten()

filtered_data = data.drop(columns=["label", "encoded_label"]).apply(apply_kalman_filter, axis=0)
filtered_data["encoded_label"] = data["encoded_label"]


In [42]:
filtered_data.to_csv("filtered_eeg_data.csv", index=False)
print("Data saved successfully.")


Data saved successfully.


In [43]:

X = filtered_data.drop(columns=["encoded_label"])
y = filtered_data["encoded_label"]

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

NameError: name 'SMOTE' is not defined

In [None]:

sns.countplot(x=y_resampled)
plt.title("Balanced Class Distribution After SMOTE")
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
for i in range(3): 
    plt.plot(X_resampled.iloc[i, :], label=f"Sample {i}")
plt.title("Example EEG Signals After Preprocessing")
plt.legend()
plt.show()


In [None]:


# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}, Test set size: {X_test.shape}")


In [None]:
scaler = MinMaxScaler()

# Normalize training and testing features
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data normalization complete.")


In [None]:

k_features = 50  # Choose the number of features based on your dataset
chi2_selector = SelectKBest(chi2, k=k_features)
X_train_selected = chi2_selector.fit_transform(X_train_scaled, y_train)
X_test_selected = chi2_selector.transform(X_test_scaled)

print(f"Selected top {k_features} features.")


In [None]:
# Define DBN-like architecture
dbn_model = Sequential([
    Dense(512, activation='relu', input_dim=X_train_selected.shape[1]),
    BatchNormalization(),
    Dropout(0.4),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    Dropout(0.2),

    Dense(len(y.unique()), activation='softmax')
])

# Compile the model with custom learning rate
dbn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Define callbacks for checkpointing and early stopping
checkpoint_callback = ModelCheckpoint(
    filepath="best_emotion_detection_model.h5",
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)

early_stopping_callback = EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

# Train the model
history = dbn_model.fit(
    X_train_selected, y_train,
    validation_data=(X_test_selected, y_test),
    epochs=100,
    batch_size=64,
    callbacks=[checkpoint_callback, early_stopping_callback]
)


In [None]:

# Plot training and validation accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.legend()
plt.show()

# Evaluate the model
test_loss, test_acc = dbn_model.evaluate(X_test_selected, y_test)
print(f"Test Accuracy: {test_acc:.2f}")


In [None]:
with open("scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)

In [None]:

y_pred_probs = dbn_model.predict(X_test_selected)  # Probabilities
y_pred_classes = np.argmax(y_pred_probs, axis=1)  # Class labels


In [None]:

conf_matrix = confusion_matrix(y_test, y_pred_classes)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()




In [None]:
# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))