In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt

In [8]:
df = pd.read_csv('output//final_data_labeled.csv')

# Select features and target variable
features = ['magnitude', 'EDA', 'HR']
target = 'label'

X = df[features]
y = df[target]

In [9]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Defining the classifiers for comparing accuracy over popular algorithms
classifiers = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'k-NN': KNeighborsClassifier(),
    'SVM': SVC(random_state=42)
}

In [11]:
# Train and evaluate each classifier
 
for name, clf in classifiers.items():
    # Evaluate the model using cross-validation
    cv_scores = cross_val_score(clf, X_train_scaled, y_train, cv=5, scoring='recall_macro')
    print(f"{name} cross-validation recall_macro scores: {cv_scores}")
    print(f"{name} mean cross-validation recall_macro score: {np.mean(cv_scores)}")

    # Train the model on the training data
    clf.fit(X_train_scaled, y_train)
    svmclf = clf
    # Predict the labels for the test data
    y_pred = clf.predict(X_test_scaled)

    # Calculate recall score
    recall = recall_score(y_test, y_pred, average='macro')
    print(f"{name} recall_macro score: {recall}\n")

Logistic Regression cross-validation recall_macro scores: [0.62999235 0.62770042 0.63243979 0.63224544 0.63185304]
Logistic Regression mean cross-validation recall_macro score: 0.6308462083723889
Logistic Regression recall_macro score: 0.6298164447108411

Random Forest cross-validation recall_macro scores: [0.99826543 0.99825198 0.99855345 0.99828835 0.99810114]
Random Forest mean cross-validation recall_macro score: 0.9982920708285249
Random Forest recall_macro score: 0.9986268016933387

k-NN cross-validation recall_macro scores: [0.9743206  0.97402119 0.97401531 0.97382233 0.97326775]
k-NN mean cross-validation recall_macro score: 0.9738894366481397
k-NN recall_macro score: 0.9775679005659385

SVM cross-validation recall_macro scores: [0.81542301 0.8133021  0.81548722 0.81484511 0.81398402]
SVM mean cross-validation recall_macro score: 0.8146082926377052
SVM recall_macro score: 0.8154238948119049



F1 Scores of Models from below function
F1-score = 2 * (precision * recall_value) / (precision + recall_value)

In [1]:
def f1_score(precision, recall):
    return 2 * (precision * recall) / (precision + recall)

In [3]:
logistic_regression_precision = 0.6308462083723889
logistic_regression_recall = 0.6298164447108411
logistic_regression_f1 = f1_score(logistic_regression_precision, logistic_regression_recall)

random_forest_precision = 0.9982920708285249
random_forest_recall = 0.9986268016933387
random_forest_f1 = f1_score(random_forest_precision, random_forest_recall)

knn_precision = 0.9738894366481397
knn_recall = 0.9775679005659385
knn_f1 = f1_score(knn_precision, knn_recall)

svm_precision = 0.8146082926377052
svm_recall = 0.8154238948119049
svm_f1 = f1_score(svm_precision, svm_recall)

print("Logistic Regression F1-score:", logistic_regression_f1)
print("Random Forest F1-score:", random_forest_f1)
print("k-NN F1-score:", knn_f1)
print("SVM F1-score:", svm_f1)

Logistic Regression F1-score: 0.6303309059639147
Random Forest F1-score: 0.9984594082065242
k-NN F1-score: 0.9757252016860523
SVM F1-score: 0.815015889677636


: 