In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:

# Step 1: Data Preprocessing
# Load the dataset
df = pd.read_excel("./dataset/dataset.xlsx")


In [None]:
# Drop irrelevant columns
df.drop(["Subject", "timestamps"], axis=1, inplace=True)

In [None]:
# Check for missing values and handle if necessary
if df.isnull().sum().any():
    df.fillna(0, inplace=True)  # Replace missing values with 0 or apply any other suitable strategy


In [None]:
df.head

In [None]:
# Encode the "Emotion" labels into numerical values
label_encoder = LabelEncoder()
df["Emotion"] = label_encoder.fit_transform(df["Emotion"])


In [None]:
# Split the dataset into features (X) and labels (y)
X = df.drop("Emotion", axis=1)
y = df["Emotion"]


In [None]:
# Step 2: Data Visualization
# Visualize the distribution of emotions
emotion_counts = df["Emotion"].value_counts()
plt.bar(emotion_counts.index, emotion_counts.values)
plt.xticks(ticks=emotion_counts.index, labels=label_encoder.classes_)
plt.xlabel("Emotion")
plt.ylabel("Count")
plt.title("Emotion Distribution")
plt.show()


In [None]:
# Step 3: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Step 4: Model Training
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Train SVM classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

In [None]:
# Train other classifiers for comparison
lr_model = LogisticRegression()
rf_model = RandomForestClassifier()
gb_model = GradientBoostingClassifier()
lr_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)

In [None]:
# Step 5: Model Evaluation
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
    
    print(f"=== {model_name} ===")
    print("Accuracy:", accuracy)
    print("Confusion Matrix:\n", conf_matrix)
    print("Classification Report:\n", class_report)

# Evaluate SVM model
evaluate_model(svm_model, X_test, y_test, "SVM")

# Evaluate other models for comparison
evaluate_model(lr_model, X_test, y_test, "Logistic Regression")
evaluate_model(rf_model, X_test, y_test, "Random Forest")
evaluate_model(gb_model, X_test, y_test, "Gradient Boosting")