In [1]:
############################## can we decide between different thought categories? ############################
## Dependencies

# Import package/module for data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Import modules for feature engineering and modelling
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.impute import SimpleImputer

# pipeline
from imblearn.pipeline import Pipeline

# models
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.neural_network import MLPClassifier

# cross validation and hyperparameter tuning
from sklearn.model_selection import StratifiedGroupKFold,  GridSearchCV

# balancing
from imblearn.over_sampling import RandomOverSampler, SMOTE

#accuracy
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, classification_report

#feature selection
from sklearn import set_config
from sklearn.feature_selection import SelectKBest, chi2


In [6]:
read_path = r"W:\WCT\04_Mind-Wandering-Labstudy\04_Daten\04_Prepared_data\00_Julia\Model Building\features_with_labels.csv"
df = pd.read_csv(read_path)

In [4]:
# merge categories
# delte Category Others, becasue its not predictible
df = df.loc[df["Content_cat"] != 14]
# merge task- related thoughts to On-task, to get bigger classes
df["Content_Probe"] = df["Content_Probe"].replace("TRT", "On Task")

df = df.reset_index(drop = True)#.drop(['Unnamed: 0'], axis=1)
df

Unnamed: 0.1,Unnamed: 0,Participant,Probe,Tracking Ratio [%] Mean,Fixation Duration Mean [ms],Fixation Duration Max [ms],Fixation Duration Min [ms],Fixation Duration Median [ms],Fixation Duration Std [ms],Fixation Duration Skew [ms],...,Fixation Average Pupil Diameter [mm] Kurtosis,Veregence Angles Mean [rad],Veregence Angles Std [rad],Pupil Distance Mean [px],Pupil Distance Std [px],Awareness_all,Content_cat,Content_text,Content_Probe,Awareness_all_cat
0,0,1,1,85.268,259.893077,668.0035,79.9605,246.01250,148.837048,1.275655,...,-0.397839,0.024544,0.008193,219.578560,126.804463,task-related,3.0,Lecture Comprehension (TRI),TRI,1
1,1,1,10,81.388,195.289221,738.0090,59.9435,144.00400,139.321063,2.273614,...,-0.369095,0.016063,0.008325,220.563184,106.613971,task-related,1.0,On task,On Task,1
2,2,1,11,83.450,204.240667,749.8700,63.9685,165.05425,136.716801,2.419750,...,18.112139,0.033338,0.023195,209.008402,109.885864,task-related,3.0,Lecture Comprehension (TRI),TRI,1
3,3,1,12,83.736,311.880068,1004.0200,65.9780,206.00375,266.756308,1.735261,...,13.587151,0.030035,0.018617,227.466912,125.311414,task-related,3.0,Lecture Comprehension (TRI),TRI,1
4,4,1,13,87.020,483.836531,1203.8275,111.9665,311.97925,384.029087,1.091558,...,-0.213811,0.024829,0.006304,218.641919,111.928529,task-related,2.0,Ideas about lecture (TRT),On Task,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1013,1022,97,5,91.360,563.403233,2699.5470,107.9430,208.01900,868.559968,2.276932,...,-1.417532,0.019285,0.006715,259.802260,70.026819,aware,4.0,Personal matters (TUT),TUT,2
1014,1023,97,6,96.044,689.920744,3299.6905,85.9770,254.93450,961.214162,2.220278,...,0.946158,0.019262,0.005560,262.876279,58.771618,task-related,1.0,On task,On Task,1
1015,1024,97,7,86.734,358.844061,929.7645,101.9995,246.88975,248.337427,1.136750,...,0.018713,0.027485,0.008152,248.013878,88.994922,task-related,3.0,Lecture Comprehension (TRI),TRI,1
1016,1025,97,8,87.880,347.362106,905.9330,98.0240,256.97750,223.867392,1.267630,...,-1.078778,0.019740,0.008049,264.238181,36.298896,aware,4.0,Personal matters (TUT),TUT,2


In [5]:
# Label encoder for content probe
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
df["Content_probe_categories"] = labelencoder.fit_transform(df["Content_Probe"])
df["Content_probe_categories"].unique()
df["Content_probe_categories"].value_counts()

# Categories not so speficic: 
#0    452 On-Task & TRT: 44,4007858546169
#2   396 TUT 38,89980353634578
#1    170 TRI 16,69941060903733

0    452
2    396
1    170
Name: Content_probe_categories, dtype: int64

In [6]:
def get_X_y(train):
    FEATURES = [
        'Fixation Duration Mean [ms]', 'Fixation Duration Max [ms]', 'Fixation Duration Min [ms]', 'Fixation Duration Median [ms]', 'Fixation Duration Std [ms]', 'Fixation Duration Skew [ms]', 'Fixation Duration Quantil 25 [ms]', 'Fixation Duration Quantil 75 [ms]',
        'Saccade Duration Mean [ms]', 'Saccade Duration Max [ms]', 'Saccade Duration Min [ms]', 'Saccade Duration Median [ms]', 'Saccade Duration Std [ms]', 'Saccade Duration Skew [ms]', 'Saccade Duration Quantil 25 [ms]', 'Saccade Duration Quantil 75 [ms]', 
        'Blink Duration Mean [ms]', 'Blink Duration Max [ms]', 'Blink Duration Min [ms]', 'Blink Duration Median [ms]', 'Blink Duration Std [ms]', 'Blink Duration Skew [ms]', 'Blink Duration Quantil 25 [ms]', 'Blink Duration Quantil 75 [ms]', 'Fixation Duration Kurtosis [ms]',
        'Saccade Duration Kurtosis [ms]',
        'Blink Duration Kurtosis [ms]', 
        'Fixation Saccade Ratio Mean', 'Fixation Saccade Ratio Max', 'Fixation Saccade Ratio Min', 'Fixation Saccade Ratio Median', 'Fixation Saccade Ratio Std', 'Fixation Saccade Ratio Skew', 'Fixation Saccade Ratio Kurtosis', 
        'Fixation Number', 'Blink Number', 
        'Fixation Dispersion X Mean [px]', 'Fixation Dispersion X Max [px]', 'Fixation Dispersion X Min [px]', 'Fixation Dispersion X Median [px]', 'Fixation Dispersion X Std [px]', 'Fixation Dispersion X Skew [px]', 'Fixation Dispersion X Quantil 25 [px]', 'Fixation Dispersion X Quantil 75 [px]', 
        'Fixation Dispersion Y Mean [px]', 'Fixation Dispersion Y Max [px]', 'Fixation Dispersion Y Min [px]', 'Fixation Dispersion Y Median [px]', 'Fixation Dispersion Y Std [px]', 'Fixation Dispersion Y Skew [px]', 'Fixation Dispersion Y Quantil 25 [px]', 'Fixation Dispersion Y Quantil 75 [px]', 'Fixation Dispersion X Kurtosis [px]', 'Fixation Dispersion Y Kurtosis [px]', 
        'Saccade Amplitude Mean [°]', 'Saccade Amplitude Max [°]', 'Saccade Amplitude Min [°]', 'Saccade Amplitude Median [°]', 'Saccade Amplitude Std [°]', 'Saccade Amplitude Skew [°]', 'Saccade Amplitude Quantil 25 [°]', 'Saccade Amplitude Quantil 75 [°]', 'Saccade Amplitude Kurtosis [°]',
        'Saccade Acceleration Average [°/s²] Mean', 'Saccade Acceleration Average [°/s²] Max', 'Saccade Acceleration Average [°/s²] Min', 'Saccade Acceleration Average [°/s²] Median', 'Saccade Acceleration Average [°/s²] Std', 'Saccade Acceleration Average [°/s²] Skew]', 'Saccade Acceleration Average [°/s²] Quantil 25]', 'Saccade Acceleration Average [°/s²] Quantil 75]',
        'Saccade Acceleration Peak [°/s²] Mean', 'Saccade Acceleration Peak [°/s²] Max', 'Saccade Acceleration Peak [°/s²] Min', 'Saccade Acceleration Peak [°/s²] Median', 'Saccade Acceleration Peak [°/s²] Std', 'Saccade Acceleration Peak [°/s²] Skew]', 'Saccade Acceleration Peak [°/s²] Quantil 25]', 'Saccade Acceleration Peak [°/s²] Quantil 75]', 'Saccade Deceleration Peak [°/s²] Mean', 
        'Saccade Deceleration Peak [°/s²] Max', 'Saccade Deceleration Peak [°/s²] Min', 'Saccade Deceleration Peak [°/s²] Median', 'Saccade Deceleration Peak [°/s²] Std', 'Saccade Deceleration Peak [°/s²] Skew]', 'Saccade Deceleration Peak [°/s²] Quantil 25]', 'Saccade Deceleration Peak [°/s²] Quantil 75]', 
        'Saccade Velocity Average [°/s²] Mean', 'Saccade Velocity Average [°/s²] Max', 'Saccade Velocity Average [°/s²] Min', 'Saccade Velocity Average [°/s²] Median', 'Saccade Velocity Average [°/s²] Std', 'Saccade Velocity Average [°/s²] Skew]', 'Saccade Velocity Average [°/s²] Quantil 25]', 'Saccade Velocity Average [°/s²] Quantil 75]', 
        'Saccade Velocity Peak [°/s²] Mean', 'Saccade Velocity Peak [°/s²] Max', 'Saccade Velocity Peak [°/s²] Min', 'Saccade Velocity Peak [°/s²] Median', 'Saccade Velocity Peak [°/s²] Std', 'Saccade Velocity Peak [°/s²] Skew]', 'Saccade Velocity Peak [°/s²] Quantil 25]', 'Saccade Velocity Peak [°/s²] Quantil 75]', 
        'Saccade Velocity Peak [%] Mean', 'Saccade Velocity Peak [%] Max', 'Saccade Velocity Peak [%] Min', 'Saccade Velocity Peak [%] Median', 'Saccade Velocity Peak [%] Std', 'Saccade Velocity Peak [%] Skew]', 'Saccade Velocity Peak [%] Quantil 25]', 'Saccade Velocity Peak [%] Quantil 75]', 
        'Saccade Acceleration Average [°/s²] Kurtosis', 'Saccade Acceleration Peak [°/s²] Kurtosis', 'Saccade Deceleration Peak [°/s²] Kurtosis', 'Saccade Velocity Average [°/s²] Kurtosis', 'Saccade Velocity Peak [°/s²] Kurtosis', 'Saccade Velocity Peak [%] Kurtosis', 
        'Saccade Length Mean [px]', 'Saccade Length Max [px]', 'Saccade Length Min [px]', 'Saccade Length Median [px]', 'Saccade Length Std [px]', 'Saccade Length Skew [px]]', 'Saccade Length Quantil 25 [px]]', 'Saccade Length Quantil 75 [px]]', 'Saccade Length Kurtosis [px]', 
        'Fixation Average Pupil Diameter [mm] Mean', 'Fixation Average Pupil Diameter [mm] Max', 'Fixation Average Pupil Diameter [mm] Min', 'Fixation Average Pupil Diameter [mm] Median', 'Fixation Average Pupil Diameter [mm] Std', 'Fixation Average Pupil Diameter [mm] Skew', 'Fixation Average Pupil Diameter [mm] Quantil25', 'Fixation Average Pupil Diameter [mm] Quantil75',
        'Fixation Average Pupil Diameter [mm] Kurtosis', 
        'Veregence Angles Mean [rad]', 'Veregence Angles Std [rad]', 
        'Pupil Distance Mean [px]', 'Pupil Distance Std [px]'
    ]

    GROUPS = "Participant"

    TARGET_THOUGHT_CAT = "Content_probe_categories"
    
    X = train[FEATURES]
    y_thoughts_cat = train[TARGET_THOUGHT_CAT]

    groups = train[GROUPS]

    return X, y_thoughts_cat, groups

In [7]:
X, y_thoughts_cat, groups = get_X_y(df)

In [65]:
#### Thought Categories not specific
## Pipleline
imputer =  SimpleImputer(fill_value='missing')
scaler = StandardScaler()
over = SMOTE(random_state= 27) 

# neu tunen für neue Kategorien
#model = MLPClassifier()
#model = RandomForestClassifier(random_state=0,bootstrap= True, max_depth = 50, max_features="auto", min_samples_leaf = 10, min_samples_split= 1, n_estimators = 50)
#model = SVC(C = 0.1, gamma = 0.5, kernel = "linear")
#model = xgb.XGBClassifier(objective="objective=multi:softmax", random_state=42, colsample_bytree = 1, max_depth = 3, n_estimators = 100, subsample = 1)
model = GaussianNB()

steps = [('imputer', imputer), ('scaler',scaler),('over', over), ('model', model)]
pipe = Pipeline(steps=steps)

In [66]:
##################### prediction without baseline #####################
import random
from sklearn import metrics
sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0

f1_scores ={"0": [], "2": [], "1": []}
precision_scores = {"0": [], "2": [], "1": []}
recall_scores ={"0": [], "2": [], "1": []}

# stratifies group k fold
for train_index, test_index in sgk.split(X, y_thoughts_cat, groups):
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y_thoughts_cat.loc[train_index], y_thoughts_cat.loc[test_index]

    # Fit Model on Train
    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test)

    labels = [0, 1, 2]
    f1 = f1_score(y_test, y_pred, average=None, labels=labels)
    f1_scores["0"].append(f1[0])
    f1_scores["2"].append(f1[2])
    f1_scores["1"].append(f1[1])

    recall = recall_score(y_test, y_pred, average=None, labels=labels)
    recall_scores["0"].append(recall[0])
    recall_scores["2"].append(recall[2])
    recall_scores["1"].append(recall[1])


    precision = precision_score(y_test, y_pred, average=None, labels=labels)
    precision_scores["0"].append(precision[0])
    precision_scores["2"].append(precision[2])
    precision_scores["1"].append(precision[1])

    #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
    #auc = metrics.auc(fpr, tpr)

    #print(f"f1 with labels  score for fold {fold}: ", auc)
    print(f"f1 with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, f1)})
    print(f"recall with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, recall)})
    print(f"precision with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, precision)})
    
    fold += 1


f1 with labels  score for fold 0:  {0: 0.22764227642276424, 1: 0.3191489361702127, 2: 0.18691588785046728}
recall with labels  score for fold 0:  {0: 0.14893617021276595, 1: 0.8571428571428571, 2: 0.125}
precision with labels  score for fold 0:  {0: 0.4827586206896552, 1: 0.19607843137254902, 2: 0.37037037037037035}
f1 with labels  score for fold 1:  {0: 0.273972602739726, 1: 0.21052631578947367, 2: 0.15730337078651682}
recall with labels  score for fold 1:  {0: 0.18691588785046728, 1: 0.6923076923076923, 2: 0.1}
precision with labels  score for fold 1:  {0: 0.5128205128205128, 1: 0.12413793103448276, 2: 0.3684210526315789}
f1 with labels  score for fold 2:  {0: 0.24390243902439024, 1: 0.28415300546448086, 2: 0.12500000000000003}
recall with labels  score for fold 2:  {0: 0.20270270270270271, 1: 0.6666666666666666, 2: 0.07291666666666667}
precision with labels  score for fold 2:  {0: 0.30612244897959184, 1: 0.18055555555555555, 2: 0.4375}
f1 with labels  score for fold 3:  {0: 0.300751

In [67]:
#results
# Categories not so speficic: 
#0    452 On-Task & TRT
#2   396 TUT
#1    170 TRI
f1_averages = [(k, sum(v)/len(v)) for k, v in f1_scores.items()]
print("f1 score:",f1_averages)


precision_averages = [(k, sum(v)/len(v)) for k, v in precision_scores.items()]
print("precision:", precision_averages)

recall_averages = [(k, sum(v)/len(v)) for k, v in recall_scores.items()]
print("recall:", recall_averages)


f1 score: [('0', 0.2653941904544187), ('2', 0.1148964833063442), ('1', 0.27715161639711416)]
precision: [('0', 0.46824611969391494), ('2', 0.3185916179337232), ('1', 0.17086863755936044)]
recall: [('0', 0.18883627184628182), ('2', 0.07163152610441767), ('1', 0.7502040992368999)]


In [47]:
over.get_params()


{'k_neighbors': 5,
 'n_jobs': None,
 'random_state': 27,
 'sampling_strategy': 'auto'}

In [48]:
from collections import Counter
pipe[2]

In [68]:
##################### prediction without baseline with macro f1 score #####################
import random
from sklearn import metrics
sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0

f1_scores =[]
precision_scores = []
recall_scores =[]

### stratifies group k fold
for train_index, test_index in sgk.split(X, y_thoughts_cat, groups):
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y_thoughts_cat.loc[train_index], y_thoughts_cat.loc[test_index]

    # Fit Model on Train
    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test)

    labels = [0, 1, 2]
    f1 = f1_score(y_test, y_pred, average= "macro", labels=labels)
    f1_scores.append(f1)
  
    #print(f1)

    recall = recall_score(y_test, y_pred, average= "macro", labels=labels)
    recall_scores.append(recall)

    #print(recall)

    precision = precision_score(y_test, y_pred, average="macro", labels=labels)
    precision_scores.append(precision)

    #print(recall)

    #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
    #auc = metrics.auc(fpr, tpr)

    fold += 1


mean_f1 = np.mean(f1_scores)
mean_recall = np.mean(recall_scores)
mean_precision = np.mean(precision_scores)

print(f'Our mean fold f1 score is {mean_f1:0.4f}')
print(f'Our mean fold recall is {mean_recall:0.4f}')
print(f'Our mean fold precision is {mean_precision:0.4f}')


#Our mean fold f1 score is 0.3877
#Our mean fold recall is 0.3905
#Our mean fold precision is 0.3951

Our mean fold f1 score is 0.2191
Our mean fold recall is 0.3369
Our mean fold precision is 0.3192


In [None]:
##################### prediction with baseline #####################
# distribution:
#0    452 On-Task & TRT
#2   396 TUT
#1    170 TRI 

sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0
f1_scores ={"0": [], "1": [], "2": []}
precision_scores = {"0": [], "1": [], "2": []}
recall_scores ={"0": [], "1": [], "2": []}

### stratifies group k fold
for i in range(10):
    for train_index, test_index in sgk.split(X, y_thoughts_cat, groups):
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y_thoughts_cat.loc[train_index], y_thoughts_cat.loc[test_index]
    
        # Fit Model on Train
        pipe.fit(X_train, y_train)
    
        y_pred = pipe.predict(X_test)
    
        # create baseline
        data_size = 1018
        baseline = np.ones(len(y_pred))
        TUT_size = 396/data_size * len(y_pred)
        baseline[: int(TUT_size)] = 2

        On_Task_size =  452/data_size * len(y_pred)
        baseline[int(TUT_size):int(TUT_size) +int(On_Task_size)] = 0

        np.random.shuffle(baseline)
        baseline = baseline.astype(int)
        labels = [0, 1, 2]
        f1 = f1_score(y_test, baseline, average=None, labels=labels)
        f1_scores["0"].append(f1[0])
        f1_scores["1"].append(f1[1])
        f1_scores["2"].append(f1[2])

        recall = recall_score(y_test, baseline, average=None, labels=labels)
        recall_scores["0"].append(recall[0])
        recall_scores["1"].append(recall[1])
        recall_scores["2"].append(recall[2])

        precision = precision_score(y_test, baseline, average=None, labels=labels)
        precision_scores["0"].append(precision[0])
        precision_scores["1"].append(precision[1])
        precision_scores["2"].append(precision[2])

        #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
        #auc = metrics.auc(fpr, tpr)
    
        #print(f"f1 with labels  score for fold {fold}: ", auc)
        print(f"f1 with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, f1)})
        print(f"recall with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, recall)})
        print(f"precision with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, precision)})
        
        fold += 1

In [35]:
#results
# Categories not so speficic: 
#0    452 On-Task & TRT
#2   396 TUT
#1    170 TRI
f1_averages = [(k, sum(v)/len(v)) for k, v in f1_scores.items()]
print("f1 score:",f1_averages)


precision_averages = [(k, sum(v)/len(v)) for k, v in precision_scores.items()]
print("precision:", precision_averages)

recall_averages = [(k, sum(v)/len(v)) for k, v in recall_scores.items()]
print("recall:", recall_averages)


f1 score: [('0', 0.4449827047557848), ('1', 0.1858383753934031), ('2', 0.37648062566786905)]
precision: [('0', 0.4480190394998578), ('1', 0.18493362193362195), ('2', 0.37791232283890513)]
recall: [('0', 0.445312200998912), ('1', 0.18993991318465672), ('2', 0.37715488606879544)]


In [None]:
##################### prediction with baseline #####################
# distribution:
        # 4    396
        # 0    374
        # 2    170
        # 3     78
        # 1      9

# 1. not so speficic: 
        # 0: on-Task, 
        # 1: Others
        # 2: TRI
        # 3: TRT, 
        # 4: TUT,  
sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)
fold = 0
f1_scores =[]
precision_scores = []
recall_scores =[]

### stratifies group k fold
for i in range(10):
    for train_index, test_index in sgk.split(X, y_thoughts_cat, groups):
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y_thoughts_cat.loc[train_index], y_thoughts_cat.loc[test_index]
    
        # Fit Model on Train
        pipe.fit(X_train, y_train)
    
        y_pred = pipe.predict(X_test)
    
        # create baseline
        data_size = 1018
        baseline = np.ones(len(y_pred))
        TUT_size = 396/data_size * len(y_pred)
        baseline[: int(TUT_size)] = 2

        On_Task_size =  452/data_size * len(y_pred)
        baseline[int(TUT_size):int(TUT_size) +int(On_Task_size)] = 0

        np.random.shuffle(baseline)
        baseline = baseline.astype(int)
    
    
        labels = [0, 1, 2]
        f1 = f1_score(y_test, baseline, average= "macro", labels=labels)
        f1_scores.append(f1)
    

        recall = recall_score(y_test, baseline, average= "macro", labels=labels)
        recall_scores.append(recall)


        precision = precision_score(y_test, baseline, average="macro", labels=labels)
        precision_scores.append(precision)


        #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
        #auc = metrics.auc(fpr, tpr)
    
        print(f"f1  score for fold {fold}: ", f1)
        print(f"recall for fold {fold}: ", recall)
        print(f"precision for fold {fold}: ", precision)
        
        fold += 1

mean_f1 = np.mean(f1_scores)
mean_recall = np.mean(recall_scores)
mean_precision = np.mean(precision_scores)

print(f'Our mean fold f1 score is {mean_f1:0.4f}')
print(f'Our mean fold recall is {mean_recall:0.4f}')
print(f'Our mean fold precision is {mean_precision:0.4f}')

