## Loading Package and Library

In [31]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np 
import os 
import pandas as pd 
import glob
from IPython.display import display, Markdown
import plotly.graph_objs as go
import plotly.offline as py
from scipy.stats import pearsonr
import seaborn as sns
sns.set(style='darkgrid')


from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report



## Models
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
from catboost import CatBoostClassifier

## Loading Dataset and Preproccessing

In [3]:
files = os.path.join("ER_dataset/", "*.csv")
files = glob.glob(files)
df = pd.concat(map(pd.read_csv, files))

###  Rename Columns
We rename columns, because it makes confusing to identify. We will try to rename this column to make it easily understandable.

In [4]:
df.columns = ['# condition','emotion','Feature 1','Feature 2','Feature 3','Feature 4','Feature 5','Feature 6','Feature 7']


## Split the dataset

In [5]:

df['emotion'] = df['emotion'].map({-1:0, 0:1, 1:2})
X = df.drop('emotion', axis=1)
X = df.drop('# condition', axis=1)
y = df.emotion

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Function to Evaluation Methods

In [9]:

def print_score(clf, X_train, y_train, X_test, y_test, train=True):
    if train:
        pred = clf.predict(X_train)
        clf_report = pd.DataFrame(classification_report(y_train, pred, output_dict=True))
        print("Train Result:\n================================================")
        print(f"Accuracy Score: {accuracy_score(y_train, pred) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_train, pred)}\n")
        
    elif train==False:
        pred = clf.predict(X_test)
        clf_report = pd.DataFrame(classification_report(y_test, pred, output_dict=True))
        print("Test Result:\n================================================")        
        print(f"Accuracy Score: {accuracy_score(y_test, pred) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_test, pred)}\n")

## 1. xgboost Model

In [8]:

xgb_clf = XGBClassifier(use_label_encoder=False)
xgb_clf.fit(X_train, y_train)


`use_label_encoder` is deprecated in 1.7.0.



### Evaluation Model

In [11]:
print_score(xgb_clf, X_train, y_train, X_test, y_test, train=True)
print_score(xgb_clf, X_train, y_train, X_test, y_test, train=False)

Train Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                  0         1         2  accuracy  macro avg  weighted avg
precision       1.0       1.0       1.0       1.0        1.0           1.0
recall          1.0       1.0       1.0       1.0        1.0           1.0
f1-score        1.0       1.0       1.0       1.0        1.0           1.0
support    151575.0  150965.0  151758.0       1.0   454298.0      454298.0
_______________________________________________
Confusion Matrix: 
 [[151575      0      0]
 [     0 150965      0]
 [     0      0 151758]]

Test Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                 0        1        2  accuracy  macro avg  weighted avg
precision      1.0      1.0      1.0       1.0        1.0           1.0
recall         1.0      1.0      1.0       1.0        1.0           1.0
f1-score       1.0      1.0      1.0       1.0    

In [13]:
test_score = accuracy_score(y_test, xgb_clf.predict(X_test)) * 100
print(f"test_score of xgboost Model:{test_score}")

test_score of xgboost Model:100.0


## 2. AdaBoost

In [16]:

# Create adaboost classifer object
abc = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0)

# Train Adaboost Classifer
model1 = abc.fit(X_train, y_train)




AdaBoost Classifier Model Accuracy: 1.0


### Evaluation

In [52]:
#Predict the response for test dataset
y_pred = model1.predict(X_test)
# calculate and print model accuracy
print("AdaBoost Classifier Model Accuracy:", accuracy_score(y_test, y_pred))


AdaBoost Classifier Model Accuracy: 1.0


In [17]:
#print_score(model1, X_train, y_train, X_test, y_test, train=True)
print_score(model1, X_train, y_train, X_test, y_test, train=False)

Test Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                 0        1        2  accuracy  macro avg  weighted avg
precision      1.0      1.0      1.0       1.0        1.0           1.0
recall         1.0      1.0      1.0       1.0        1.0           1.0
f1-score       1.0      1.0      1.0       1.0        1.0           1.0
support    64967.0  65177.0  64556.0       1.0   194700.0      194700.0
_______________________________________________
Confusion Matrix: 
 [[64967     0     0]
 [    0 65177     0]
 [    0     0 64556]]



## 3. LightGBM Classifier

In [19]:
# build the lightgbm model
clf = lgb.LGBMClassifier()
clf.fit(X_train, y_train)

### Evaluation

In [20]:

print_score(clf, X_train, y_train, X_test, y_test, train=False)

Test Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                 0        1        2  accuracy  macro avg  weighted avg
precision      1.0      1.0      1.0       1.0        1.0           1.0
recall         1.0      1.0      1.0       1.0        1.0           1.0
f1-score       1.0      1.0      1.0       1.0        1.0           1.0
support    64967.0  65177.0  64556.0       1.0   194700.0      194700.0
_______________________________________________
Confusion Matrix: 
 [[64967     0     0]
 [    0 65177     0]
 [    0     0 64556]]



In [22]:
test_score = accuracy_score(y_test, model1.predict(X_test)) * 100
print(f"test_score of LightGBM Classifier:{test_score}")

test_score of LightGBM Classifier:100.0


## 4.Cat Boost 

In [25]:

clf = CatBoostClassifier(
    iterations=5, 
    learning_rate=0.1, 
    #loss_function='CrossEntropy'
)


clf.fit(X_train, y_train,  
        eval_set=(X_test, y_test), 
        verbose=False
)


<catboost.core.CatBoostClassifier at 0x1adf3b03910>

In [26]:

print_score(clf, X_train, y_train, X_test, y_test, train=False)

Test Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                 0        1        2  accuracy  macro avg  weighted avg
precision      1.0      1.0      1.0       1.0        1.0           1.0
recall         1.0      1.0      1.0       1.0        1.0           1.0
f1-score       1.0      1.0      1.0       1.0        1.0           1.0
support    64967.0  65177.0  64556.0       1.0   194700.0      194700.0
_______________________________________________
Confusion Matrix: 
 [[64967     0     0]
 [    0 65177     0]
 [    0     0 64556]]



In [27]:
test_score = accuracy_score(y_test, model1.predict(X_test)) * 100
print(f"test_score of Cat Boost:{test_score}")

test_score of Cat Boost:100.0


## 5. RandomForestClassifier

In [32]:


# Function to create windows
def create_windows(data, window_size, step_size):
    windows = []
    for i in range(0, len(data) - window_size + 1, step_size):
        windows.append(data[i:i + window_size])
    return np.array(windows)




# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Define window size and step size
window_size = 10  # Number of time steps in each window
step_size = 5     # Number of time steps to slide the window

# Create windows for features and labels
X_windows = create_windows(X_scaled, window_size, step_size)
y_windows = create_windows(y, window_size, step_size)

# Flatten the windows for classification
X_flattened = X_windows.reshape(X_windows.shape[0], -1)  # Flatten each window into a single feature vector
y_flattened = y_windows[:, -1]  # Use the last label in the window as the label for the entire window

Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_flattened, y_flattened, test_size=0.2, random_state=42)

# Train a classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)  # Use Random Forest as the classifier
clf.fit(X_train, y_train)



Accuracy: 0.9999614791987673
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      8630
           1       1.00      1.00      1.00      8685
           2       1.00      1.00      1.00      8645

    accuracy                           1.00     25960
   macro avg       1.00      1.00      1.00     25960
weighted avg       1.00      1.00      1.00     25960



### Evaluation

In [33]:

y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9999614791987673
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      8630
           1       1.00      1.00      1.00      8685
           2       1.00      1.00      1.00      8645

    accuracy                           1.00     25960
   macro avg       1.00      1.00      1.00     25960
weighted avg       1.00      1.00      1.00     25960

