# Import Libraries

In [None]:
import numpy as np
import json
import pandas as pd 
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, accuracy_score, balanced_accuracy_score, mean_squared_error, f1_score
from sklearn.svm import SVR
from sklearn import svm
from sklearn.metrics import mean_squared_error
import sklearn.ensemble
import tensorflow as tf
from tensorflow import keras

# Import Data

In [2]:
file_path = "Data\\Pilot-Data\\ML\\combined_ml_input.csv"

# Load the data
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,Participant,Stimulus,Epoch,Comfort Score,Fz_delta,Fz_theta,Fz_alpha,Fz_beta,F4_delta,F4_theta,...,O1_alpha,O1_beta,Oz_delta,Oz_theta,Oz_alpha,Oz_beta,O2_delta,O2_theta,O2_alpha,O2_beta
0,P007,Contrast1Size1,0,3,6.392014,0.101336,-0.494333,-0.254172,7.07595,0.113838,...,-0.510457,-0.341575,0.0,0.0,0.0,0.0,0.177223,-0.416796,-0.486639,-0.326462
1,P007,Contrast1Size1,1,4,3.410877,-0.121959,-0.446595,-0.260508,3.895948,-0.11849,...,-0.434389,-0.38063,0.0,0.0,0.0,0.0,0.181158,-0.406641,-0.442569,-0.392576
2,P007,Contrast1Size2,0,2,4.389003,-0.231418,-0.51945,-0.332549,4.738184,-0.220721,...,-0.472726,-0.317062,0.0,0.0,0.0,0.0,-0.105931,-0.456045,-0.490702,-0.360789
3,P007,Contrast1Size2,1,2,2.445576,0.106271,-0.450711,-0.177332,2.933193,0.258673,...,-0.506455,-0.149083,0.0,0.0,0.0,0.0,-0.195707,-0.437501,-0.479919,-0.204094
4,P007,Contrast1Size2,2,2,2.316107,0.012479,-0.438153,-0.269349,2.896455,0.101969,...,-0.480444,-0.268907,0.0,0.0,0.0,0.0,-0.064406,-0.418292,-0.480274,-0.314611


In [None]:
# Define number of training data
N_train = 60 

# Train-Test split
N_total = 80 
N_test = N_total - N_train

# Explicitly putting in the label of the majority class
#majority_class = 0

In [4]:
# Shuffle the data (to randomize the order)
shuffled_data = data.sample(frac=1).reset_index(drop=True)
shuffled_data

Unnamed: 0,Participant,Stimulus,Epoch,Comfort Score,Fz_delta,Fz_theta,Fz_alpha,Fz_beta,F4_delta,F4_theta,...,O1_alpha,O1_beta,Oz_delta,Oz_theta,Oz_alpha,Oz_beta,O2_delta,O2_theta,O2_alpha,O2_beta
0,P007,Contrast3Size2,2,3,2.929364,0.221982,-0.416695,-0.157910,3.343730,0.028388,...,-0.490129,-0.205111,0.000000,0.000000,0.000000,0.000000,0.015303,-0.418932,-0.501556,-0.318697
1,P007,Contrast2Size1,1,3,2.630212,0.106205,-0.451989,-0.271462,2.841799,0.089144,...,-0.476129,-0.331332,0.000000,0.000000,0.000000,0.000000,-0.181424,-0.404928,-0.502299,-0.350828
2,P007,Contrast3Size1,0,3,4.687463,0.322109,-0.469096,-0.282867,5.139104,0.440615,...,-0.475148,-0.246218,0.000000,0.000000,0.000000,0.000000,-0.161817,-0.457841,-0.496389,-0.310371
3,P007,Contrast4Size3,1,4,5.943374,-0.072944,-0.457276,-0.129172,5.906741,-0.002346,...,-0.445609,-0.042147,0.000000,0.000000,0.000000,0.000000,0.211570,-0.467304,-0.484525,-0.098072
4,P008,Contrast2Size2,2,1,1.796473,0.021520,-0.375465,-0.306921,1.366080,-0.031532,...,-0.398412,-0.248017,-0.001184,-0.322688,-0.403404,-0.323104,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,P008,Contrast4Size2,0,5,3.463811,0.113269,-0.334486,-0.175540,3.490843,0.060298,...,-0.415156,-0.082983,-0.004924,-0.279689,-0.415621,-0.178910,0.000000,0.000000,0.000000,0.000000
76,P007,Contrast1Size2,0,2,4.389003,-0.231418,-0.519450,-0.332549,4.738184,-0.220721,...,-0.472726,-0.317062,0.000000,0.000000,0.000000,0.000000,-0.105931,-0.456045,-0.490702,-0.360789
77,P008,Contrast4Size1,4,5,1.482600,-0.093216,-0.321371,-0.300616,1.359984,-0.147565,...,-0.369408,-0.203985,-0.081186,-0.331067,-0.390438,-0.292652,0.000000,0.000000,0.000000,0.000000
78,P007,Contrast1Size3,0,1,4.696041,0.342945,-0.446511,-0.240983,4.485701,0.298512,...,-0.442762,-0.247574,0.000000,0.000000,0.000000,0.000000,0.081836,-0.453355,-0.476601,-0.278052


In [5]:
# Split into identifier, features and labels.
identifier = shuffled_data.iloc[:,0]
labels = shuffled_data.iloc[:,3]
features = shuffled_data.iloc[:, 4:]

In [6]:
identifier

0     P007
1     P007
2     P007
3     P007
4     P008
      ... 
75    P008
76    P007
77    P008
78    P007
79    P007
Name: Participant, Length: 80, dtype: object

In [7]:
labels

0     3
1     3
2     3
3     4
4     1
     ..
75    5
76    2
77    5
78    1
79    5
Name: Comfort Score, Length: 80, dtype: int64

In [8]:
features

Unnamed: 0,Fz_delta,Fz_theta,Fz_alpha,Fz_beta,F4_delta,F4_theta,F4_alpha,F4_beta,F8_delta,F8_theta,...,O1_alpha,O1_beta,Oz_delta,Oz_theta,Oz_alpha,Oz_beta,O2_delta,O2_theta,O2_alpha,O2_beta
0,2.929364,0.221982,-0.416695,-0.157910,3.343730,0.028388,-0.400632,-0.131755,2.168711,-0.202233,...,-0.490129,-0.205111,0.000000,0.000000,0.000000,0.000000,0.015303,-0.418932,-0.501556,-0.318697
1,2.630212,0.106205,-0.451989,-0.271462,2.841799,0.089144,-0.482046,-0.258238,2.402276,-0.095379,...,-0.476129,-0.331332,0.000000,0.000000,0.000000,0.000000,-0.181424,-0.404928,-0.502299,-0.350828
2,4.687463,0.322109,-0.469096,-0.282867,5.139104,0.440615,-0.468237,-0.300976,5.229246,0.308396,...,-0.475148,-0.246218,0.000000,0.000000,0.000000,0.000000,-0.161817,-0.457841,-0.496389,-0.310371
3,5.943374,-0.072944,-0.457276,-0.129172,5.906741,-0.002346,-0.464220,-0.125501,5.371136,-0.130759,...,-0.445609,-0.042147,0.000000,0.000000,0.000000,0.000000,0.211570,-0.467304,-0.484525,-0.098072
4,1.796473,0.021520,-0.375465,-0.306921,1.366080,-0.031532,-0.386212,-0.313682,1.119800,-0.177484,...,-0.398412,-0.248017,-0.001184,-0.322688,-0.403404,-0.323104,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,3.463811,0.113269,-0.334486,-0.175540,3.490843,0.060298,-0.353019,-0.213066,6.338947,0.046303,...,-0.415156,-0.082983,-0.004924,-0.279689,-0.415621,-0.178910,0.000000,0.000000,0.000000,0.000000
76,4.389003,-0.231418,-0.519450,-0.332549,4.738184,-0.220721,-0.524267,-0.336621,4.802728,-0.259340,...,-0.472726,-0.317062,0.000000,0.000000,0.000000,0.000000,-0.105931,-0.456045,-0.490702,-0.360789
77,1.482600,-0.093216,-0.321371,-0.300616,1.359984,-0.147565,-0.381453,-0.317458,2.390388,-0.174222,...,-0.369408,-0.203985,-0.081186,-0.331067,-0.390438,-0.292652,0.000000,0.000000,0.000000,0.000000
78,4.696041,0.342945,-0.446511,-0.240983,4.485701,0.298512,-0.466887,-0.261803,4.071324,0.093972,...,-0.442762,-0.247574,0.000000,0.000000,0.000000,0.000000,0.081836,-0.453355,-0.476601,-0.278052


In [9]:
# Split into training and test sets
## Uses numbers defined above
X_train, X_test, y_train, y_test = train_test_split(
    features, 
    labels, 
    train_size=N_train,
    test_size=N_test,
    shuffle=False)

# SVM

In [15]:
# Kernel options are 'linear, 'poly', 'sigmoid', 'rbf'
svm_kernel = 'poly'  # Modify this
# If 'poly' kernel, specify a degree.
# This parameter has no effect for other kernel types
svm_degree = 3  # Modify this

In [16]:
svm_model = svm.SVC(kernel=svm_kernel, degree=svm_degree)
# Kernel optiosn are 'linear, 'poly', 'sigmoid', 'rbf'

svm_model.fit(X_train, y_train)

class_predictions_svm = svm_model.predict(X_test)

# Calculate metrics
accuracy_svm = balanced_accuracy_score(y_test, class_predictions_svm)
balanced_accuracy_svm = balanced_accuracy_score(y_test, class_predictions_svm)
f1_svm = f1_score(y_test, class_predictions_svm, average='macro')

# RF

In [17]:
# Number of decision trees in the forest
rf_trees = 100  # Modify this

In [18]:
rf_model = sklearn.ensemble.RandomForestClassifier(n_estimators=rf_trees)
rf_model.fit(X_train, y_train)

class_predictions_rf = rf_model.predict(X_test)

# Calculate metrics
accuracy_rf = balanced_accuracy_score(y_test, class_predictions_rf)
balanced_accuracy_rf = balanced_accuracy_score(y_test, class_predictions_rf)
f1_rf = f1_score(y_test, class_predictions_rf, average='macro')

# Results

In [19]:
# Summary of outcomes

# SVM
print(f"SVM Accuracy: {accuracy_svm}")
print(f"SVM Balanced Accuracy: {balanced_accuracy_svm}")
print(f"SVM F1 Score: {f1_svm}")

print('\n')

# Random Forest
print(f"RF Accuracy: {accuracy_rf}")
print(f"RF Balanced Accuracy: {balanced_accuracy_rf}")
print(f"RF F1 Score: {f1_rf}")


SVM Accuracy: 0.2
SVM Balanced Accuracy: 0.2
SVM F1 Score: 0.08


RF Accuracy: 0.11666666666666665
RF Balanced Accuracy: 0.11666666666666665
RF F1 Score: 0.10666666666666666
