In [3]:
## Dependencies

# Import package/module for data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Import modules for feature engineering and modelling
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.impute import SimpleImputer

# pipeline
from imblearn.pipeline import Pipeline

# models
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.neural_network import MLPClassifier

# cross validation and hyperparameter tuning
from sklearn.model_selection import StratifiedGroupKFold,  GridSearchCV

# balancing
from imblearn.over_sampling import RandomOverSampler, SMOTE

#accuracy
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, classification_report

#feature selection
from sklearn import set_config
from sklearn.feature_selection import SelectKBest, chi2

In [4]:
read_path = r"C:\Users\Julia\Desktop\features_with_labels.csv"
#read_path = r"W:\WCT\04_Mind-Wandering-Labstudy\04_Daten\04_Prepared_data\00_Julia\Model Building\features_with_labels.csv"
df = pd.read_csv(read_path)

In [5]:
df["Awareness_all"].unique()

array(['task-related', 'aware', 'unaware'], dtype=object)

In [6]:
#df["Awareness_all"].unique()
df["Awareness_all"].value_counts()

task-related    631
aware           249
unaware         147
Name: Awareness_all, dtype: int64

In [7]:
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
df["Awareness_all_new"] = labelencoder.fit_transform(df["Awareness_all"])
df["Awareness_all_new"].unique()

array([1, 0, 2])

In [8]:
df["Awareness_all_new"].value_counts()
# 1 = task related, 0 aware 2 unaware

1    631
0    249
2    147
Name: Awareness_all_new, dtype: int64

In [9]:
# ['task-related' == 1 , 'aware' == 2, 'unaware' == 3 , nan == delete the row]
def get_X_y(train):
    FEATURES = [
        'Fixation Duration Mean [ms]', 'Fixation Duration Max [ms]', 'Fixation Duration Min [ms]', 'Fixation Duration Median [ms]', 'Fixation Duration Std [ms]', 'Fixation Duration Skew [ms]', 'Fixation Duration Quantil 25 [ms]', 'Fixation Duration Quantil 75 [ms]',
        'Saccade Duration Mean [ms]', 'Saccade Duration Max [ms]', 'Saccade Duration Min [ms]', 'Saccade Duration Median [ms]', 'Saccade Duration Std [ms]', 'Saccade Duration Skew [ms]', 'Saccade Duration Quantil 25 [ms]', 'Saccade Duration Quantil 75 [ms]', 
        'Blink Duration Mean [ms]', 'Blink Duration Max [ms]', 'Blink Duration Min [ms]', 'Blink Duration Median [ms]', 'Blink Duration Std [ms]', 'Blink Duration Skew [ms]', 'Blink Duration Quantil 25 [ms]', 'Blink Duration Quantil 75 [ms]', 'Fixation Duration Kurtosis [ms]',
        'Saccade Duration Kurtosis [ms]',
        'Blink Duration Kurtosis [ms]', 
        'Fixation Saccade Ratio Mean', 'Fixation Saccade Ratio Max', 'Fixation Saccade Ratio Min', 'Fixation Saccade Ratio Median', 'Fixation Saccade Ratio Std', 'Fixation Saccade Ratio Skew', 'Fixation Saccade Ratio Kurtosis', 
        'Fixation Number', 'Blink Number', 
        'Fixation Dispersion X Mean [px]', 'Fixation Dispersion X Max [px]', 'Fixation Dispersion X Min [px]', 'Fixation Dispersion X Median [px]', 'Fixation Dispersion X Std [px]', 'Fixation Dispersion X Skew [px]', 'Fixation Dispersion X Quantil 25 [px]', 'Fixation Dispersion X Quantil 75 [px]', 
        'Fixation Dispersion Y Mean [px]', 'Fixation Dispersion Y Max [px]', 'Fixation Dispersion Y Min [px]', 'Fixation Dispersion Y Median [px]', 'Fixation Dispersion Y Std [px]', 'Fixation Dispersion Y Skew [px]', 'Fixation Dispersion Y Quantil 25 [px]', 'Fixation Dispersion Y Quantil 75 [px]', 'Fixation Dispersion X Kurtosis [px]', 'Fixation Dispersion Y Kurtosis [px]', 
        'Saccade Amplitude Mean [°]', 'Saccade Amplitude Max [°]', 'Saccade Amplitude Min [°]', 'Saccade Amplitude Median [°]', 'Saccade Amplitude Std [°]', 'Saccade Amplitude Skew [°]', 'Saccade Amplitude Quantil 25 [°]', 'Saccade Amplitude Quantil 75 [°]', 'Saccade Amplitude Kurtosis [°]',
        'Saccade Acceleration Average [°/s²] Mean', 'Saccade Acceleration Average [°/s²] Max', 'Saccade Acceleration Average [°/s²] Min', 'Saccade Acceleration Average [°/s²] Median', 'Saccade Acceleration Average [°/s²] Std', 'Saccade Acceleration Average [°/s²] Skew]', 'Saccade Acceleration Average [°/s²] Quantil 25]', 'Saccade Acceleration Average [°/s²] Quantil 75]',
        'Saccade Acceleration Peak [°/s²] Mean', 'Saccade Acceleration Peak [°/s²] Max', 'Saccade Acceleration Peak [°/s²] Min', 'Saccade Acceleration Peak [°/s²] Median', 'Saccade Acceleration Peak [°/s²] Std', 'Saccade Acceleration Peak [°/s²] Skew]', 'Saccade Acceleration Peak [°/s²] Quantil 25]', 'Saccade Acceleration Peak [°/s²] Quantil 75]', 'Saccade Deceleration Peak [°/s²] Mean', 
        'Saccade Deceleration Peak [°/s²] Max', 'Saccade Deceleration Peak [°/s²] Min', 'Saccade Deceleration Peak [°/s²] Median', 'Saccade Deceleration Peak [°/s²] Std', 'Saccade Deceleration Peak [°/s²] Skew]', 'Saccade Deceleration Peak [°/s²] Quantil 25]', 'Saccade Deceleration Peak [°/s²] Quantil 75]', 
        'Saccade Velocity Average [°/s²] Mean', 'Saccade Velocity Average [°/s²] Max', 'Saccade Velocity Average [°/s²] Min', 'Saccade Velocity Average [°/s²] Median', 'Saccade Velocity Average [°/s²] Std', 'Saccade Velocity Average [°/s²] Skew]', 'Saccade Velocity Average [°/s²] Quantil 25]', 'Saccade Velocity Average [°/s²] Quantil 75]', 
        'Saccade Velocity Peak [°/s²] Mean', 'Saccade Velocity Peak [°/s²] Max', 'Saccade Velocity Peak [°/s²] Min', 'Saccade Velocity Peak [°/s²] Median', 'Saccade Velocity Peak [°/s²] Std', 'Saccade Velocity Peak [°/s²] Skew]', 'Saccade Velocity Peak [°/s²] Quantil 25]', 'Saccade Velocity Peak [°/s²] Quantil 75]', 
        'Saccade Velocity Peak [%] Mean', 'Saccade Velocity Peak [%] Max', 'Saccade Velocity Peak [%] Min', 'Saccade Velocity Peak [%] Median', 'Saccade Velocity Peak [%] Std', 'Saccade Velocity Peak [%] Skew]', 'Saccade Velocity Peak [%] Quantil 25]', 'Saccade Velocity Peak [%] Quantil 75]', 
        'Saccade Acceleration Average [°/s²] Kurtosis', 'Saccade Acceleration Peak [°/s²] Kurtosis', 'Saccade Deceleration Peak [°/s²] Kurtosis', 'Saccade Velocity Average [°/s²] Kurtosis', 'Saccade Velocity Peak [°/s²] Kurtosis', 'Saccade Velocity Peak [%] Kurtosis', 
        'Saccade Length Mean [px]', 'Saccade Length Max [px]', 'Saccade Length Min [px]', 'Saccade Length Median [px]', 'Saccade Length Std [px]', 'Saccade Length Skew [px]]', 'Saccade Length Quantil 25 [px]]', 'Saccade Length Quantil 75 [px]]', 'Saccade Length Kurtosis [px]', 
        'Fixation Average Pupil Diameter [mm] Mean', 'Fixation Average Pupil Diameter [mm] Max', 'Fixation Average Pupil Diameter [mm] Min', 'Fixation Average Pupil Diameter [mm] Median', 'Fixation Average Pupil Diameter [mm] Std', 'Fixation Average Pupil Diameter [mm] Skew', 'Fixation Average Pupil Diameter [mm] Quantil25', 'Fixation Average Pupil Diameter [mm] Quantil75',
        'Fixation Average Pupil Diameter [mm] Kurtosis', 
        'Veregence Angles Mean [rad]', 'Veregence Angles Std [rad]', 
        'Pupil Distance Mean [px]', 'Pupil Distance Std [px]'
    ]

    GROUPS = "Participant"

    TARGET = "Awareness_all_new"

    X = train[FEATURES]
    y = train[TARGET]
    groups = train[GROUPS]
    return X, y, groups

In [10]:
X, y, groups = get_X_y(df)

In [11]:
##################### preprocessing with pipleline #####################
imputer =  SimpleImputer(fill_value='missing')
scaler = StandardScaler()
# Balancing
over = SMOTE(random_state= 27) #sampling_strategy=0.1


# getuned
# XGBoost
#model = xgb.XGBClassifier(objective="multi:softmax", random_state=42, colsample_bytree =1.0, max_depth = 6, n_estimators= 700, subsample = 1)

# GaussianNB
#model = GaussianNB(var_smoothing = 0.001873817422860383)

# Random forrest classifier
#model = RandomForestClassifier(random_state=0, bootstrap = True, max_depth = 25,  max_features = "auto", min_samples_leaf = 1, min_samples_split= 1, n_estimators = 50)

# SVC
#{'model__C': 0.6, 'model__gamma': 0.001, 'model__kernel': 'rbf'}
#model = SVC(kernel="rbf", C= 0.6, gamma = 0.001, probability=True)

#model = SVC(kernel="linear", C= 0.8, gamma = 0.2, probability=True)

model = MLPClassifier()

steps = [('imputer', imputer), ('scaler',scaler),('over', over), ('model', model)]
pipe = Pipeline(steps=steps)

In [75]:
##################### prediction without baseline, macro f1 score #####################
import random
from sklearn import metrics
from sklearn.metrics import roc_curve, auc

sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0

f1_scores = []
precision_scores =[]
recall_scores =[]

auc_scores = []

### stratifies group k fold
for train_index, test_index in sgk.split(X, y, groups):
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y.loc[train_index], y.loc[test_index]

    # Fit Model on Train
    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test)

    #accurancy_score = accuracy_score(y_test, y_pred)
    labels = [0, 1, 2]
    f1 = f1_score(y_test, y_pred, average= "macro", labels=labels)

    recall = recall_score(y_test, y_pred, average="macro", labels=labels)
    precision = precision_score(y_test, y_pred, average="macro", labels=labels)      
    
    f1_scores.append(f1)
    precision_scores.append(precision)
    recall_scores.append(recall)

    print(f"f1 with labels  score for fold {fold}: ", f1)
    print(f"recall with labels  score for fold {fold}: ", recall)
    print(f"precision with labels  score for fold {fold}: ", precision)
    fold += 1

mean_f1 = np.mean(f1_scores)
mean_recall = np.mean(recall_scores)
mean_precision = np.mean(precision_scores)

print(f'Our mean fold f1 score is {mean_f1:0.4f}')
print(f'Our mean fold recall is {mean_recall:0.4f}')
print(f'Our mean fold precisionis {mean_precision:0.4f}')





f1 with labels  score for fold 0:  0.2958700449475357
recall with labels  score for fold 0:  0.3
precision with labels  score for fold 0:  0.305209539932876




f1 with labels  score for fold 1:  0.2639311043566363
recall with labels  score for fold 1:  0.27092168504382247
precision with labels  score for fold 1:  0.26337038092523984




f1 with labels  score for fold 2:  0.37805469752069293
recall with labels  score for fold 2:  0.3996848739495798
precision with labels  score for fold 2:  0.40482017055988134




f1 with labels  score for fold 3:  0.3656106519742883
recall with labels  score for fold 3:  0.3905122655122655
precision with labels  score for fold 3:  0.3667772444946358
f1 with labels  score for fold 4:  0.3297532499569851
recall with labels  score for fold 4:  0.3527963291081732
precision with labels  score for fold 4:  0.3266983448857114
Our mean fold f1 score is 0.3266
Our mean fold recall is 0.3428
Our mean fold precisionis 0.3334




In [55]:
### Grid Seach Gaussian xg boost
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# defining parameter range
param_grid = {
    "model__subsample": [0.6, 0.8, 1], #np.arange(0.6,1,0.05),
    "model__max_depth": [3, 6, 10], # np.arange(3,10,1),
    "model__n_estimators": [1000, 700, 500],
    "model__colsample_bytree": [0.1,0.5, 1.0]
}

grid = GridSearchCV(pipe, param_grid = param_grid, verbose = 1, cv = 5, scoring= "f1_macro")

# muss hier mit trainig data oder mit ganzem X trainiert werden?
grid.fit(X, y)

print(grid.best_params_)
print(grid.best_estimator_)



Fitting 5 folds for each of 81 candidates, totalling 405 fits
{'model__colsample_bytree': 1.0, 'model__max_depth': 6, 'model__n_estimators': 700, 'model__subsample': 1}
Pipeline(steps=[('imputer', SimpleImputer(fill_value='missing')),
                ('scaler', StandardScaler()), ('over', SMOTE(random_state=27)),
                ('model',
                 XGBClassifier(base_score=None, booster=None, callbacks=None,
                               colsample_bylevel=None, colsample_bynode=None,
                               colsample_bytree=1.0, early_stopping_rounds=None,
                               enable_categorical=False, eval_metric=None,
                               feature_types=None, gamma...
                               grow_policy=None, importance_type=None,
                               interaction_constraints=None, learning_rate=None,
                               max_bin=None, max_cat_threshold=None,
                               max_cat_to_onehot=None, max_delta_s

In [44]:
### Grid Seach Gaussian naive bayes
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# defining parameter range
param_grid = {
    #var_smoothing is a stability calculation to widen (or smooth) the curve and therefore account for more samples that are further away from the distribution mean.
    'model__var_smoothing': np.logspace(0,-9, num=100)
}

grid = GridSearchCV(pipe, param_grid = param_grid, verbose = 1, cv = 5, scoring= "f1_macro")

# muss hier mit trainig data oder mit ganzem X trainiert werden?
grid.fit(X, y)

print(grid.best_params_)
print(grid.best_estimator_)


Fitting 5 folds for each of 100 candidates, totalling 500 fits
{'model__var_smoothing': 0.001873817422860383}
Pipeline(steps=[('imputer', SimpleImputer(fill_value='missing')),
                ('scaler', StandardScaler()), ('over', SMOTE(random_state=27)),
                ('model', GaussianNB(var_smoothing=0.001873817422860383))])


In [31]:
###### Grid Search random forrest
# hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
#{'model__bootstrap': True, 'model__max_depth': 50, 'model__max_features': 'auto', 'model__min_samples_leaf': 1, 'model__min_samples_split': 2, 'model__n_estimators': 200}

sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)
# defining parameter range
param_grid = {
    'model__bootstrap': [True],
 'model__max_depth': [25, 20, 30],
 'model__max_features': ['auto'],
 'model__min_samples_leaf': [1],
 'model__min_samples_split': [1, 2],
 'model__n_estimators': [50, 100]}

grid = GridSearchCV(pipe, param_grid = param_grid, verbose = 1, cv = 5, scoring= "f1_macro")

grid.fit(X, y)
 
print(grid.best_params_)
print(grid.best_estimator_)



Fitting 5 folds for each of 12 candidates, totalling 60 fits


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


{'model__bootstrap': True, 'model__max_depth': 20, 'model__max_features': 'auto', 'model__min_samples_leaf': 1, 'model__min_samples_split': 1, 'model__n_estimators': 50}
Pipeline(steps=[('imputer', SimpleImputer(fill_value='missing')),
                ('scaler', StandardScaler()), ('over', SMOTE(random_state=27)),
                ('model',
                 RandomForestClassifier(max_depth=20, max_features='auto',
                                        min_samples_split=1, n_estimators=50,
                                        random_state=0))])


In [21]:
############## Grid Search SVC
##################### prediction without baseline #####################
# hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)
# defining parameter range
param_grid = {'model__C': [ 0.7, 0.5, 0.6], 
              'model__gamma':  [ 0.001, 0.01 , 0.005],
              'model__kernel': ["linear", "rbf"]} 
  
grid = GridSearchCV(pipe, param_grid = param_grid, verbose = 1, cv = 5, scoring= "f1_macro")

grid.fit(X, y)
 
print(grid.best_params_)
print(grid.best_estimator_)
#'model__C': 0.8, 'model__gamma': 0.2, 'model__kernel': 'linear'}

Fitting 5 folds for each of 18 candidates, totalling 90 fits
{'model__C': 0.6, 'model__gamma': 0.001, 'model__kernel': 'rbf'}
Pipeline(steps=[('imputer', SimpleImputer(fill_value='missing')),
                ('scaler', StandardScaler()), ('over', SMOTE(random_state=27)),
                ('model', SVC(C=0.6, gamma=0.001, probability=True))])


In [76]:
##################### prediction for each class without baseline #####################
sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0

f1_scores ={"0": [], "1": [], "2": []}
precision_scores = {"0": [], "1": [], "2": []}
recall_scores ={"0": [], "1": [], "2": []}

### stratifies group k fold
for train_index, test_index in sgk.split(X, y, groups):
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y.loc[train_index], y.loc[test_index]

    # Fit Model on Train
    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test)

    #accurancy_score = accuracy_score(y_test, y_pred)

    labels = [0, 1, 2]
    f1 = f1_score(y_test, y_pred, average= None, labels=labels)
    f1_scores["0"].append(f1[0])
    f1_scores["1"].append(f1[1])
    f1_scores["2"].append(f1[2])
  

    recall = recall_score(y_test, y_pred, average=None, labels=labels)
    recall_scores["0"].append(recall[0])
    recall_scores["1"].append(recall[1])
    recall_scores["2"].append(recall[2])
    #print(recall)

    precision = precision_score(y_test, y_pred, average=None, labels=labels)
    precision_scores["0"].append(precision[0])
    precision_scores["1"].append(precision[1])
    precision_scores["2"].append(precision[2])
    #print(recall)

    #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
    #auc = metrics.auc(fpr, tpr)

    #print(f"f1 with labels  score for fold {fold}: ", auc)
    print(f"f1 with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, f1)})
    print(f"recall with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, recall)})
    print(f"precision with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, precision)})
    
    fold += 1

# # 1 = task related, 0 aware 2 unaware




f1 with labels  score for fold 0:  {0: 0.19047619047619047, 1: 0.5779467680608366, 2: 0.125}
recall with labels  score for fold 0:  {0: 0.15873015873015872, 1: 0.6031746031746031, 2: 0.14285714285714285}
precision with labels  score for fold 0:  {0: 0.23809523809523808, 1: 0.5547445255474452, 2: 0.1111111111111111}




f1 with labels  score for fold 1:  {0: 0.24242424242424246, 1: 0.672, 2: 0.05128205128205128}
recall with labels  score for fold 1:  {0: 0.3333333333333333, 1: 0.6412213740458015, 2: 0.037037037037037035}
precision with labels  score for fold 1:  {0: 0.19047619047619047, 1: 0.7058823529411765, 2: 0.08333333333333333}




f1 with labels  score for fold 2:  {0: 0.40287769784172667, 1: 0.6161137440758293, 2: 0.1}
recall with labels  score for fold 2:  {0: 0.5833333333333334, 1: 0.5462184873949579, 2: 0.07142857142857142}
precision with labels  score for fold 2:  {0: 0.3076923076923077, 1: 0.7065217391304348, 2: 0.16666666666666666}




f1 with labels  score for fold 3:  {0: 0.3650793650793651, 1: 0.6115702479338843, 2: 0.041666666666666664}
recall with labels  score for fold 3:  {0: 0.5227272727272727, 1: 0.5285714285714286, 2: 0.041666666666666664}
precision with labels  score for fold 3:  {0: 0.2804878048780488, 1: 0.7254901960784313, 2: 0.041666666666666664}
f1 with labels  score for fold 4:  {0: 0.338235294117647, 1: 0.5726872246696035, 2: 0.12244897959183672}
recall with labels  score for fold 4:  {0: 0.39655172413793105, 1: 0.5652173913043478, 2: 0.09090909090909091}
precision with labels  score for fold 4:  {0: 0.2948717948717949, 1: 0.5803571428571429, 2: 0.1875}




In [77]:
averages = [(k, sum(v)/len(v)) for k, v in f1_scores.items()]
print("f1 score:", averages)
# for k, v in averages:
#     print("f1 score, label", k, f"\n{v}")

averages = [(k, sum(v)/len(v)) for k, v in precision_scores.items()]
print("precision score: ", averages)
# for k, v in averages:
#     print("precision score, label", k, f"\n{v}")


averages = [(k, sum(v)/len(v)) for k, v in recall_scores.items()]
print("recall:", averages)
# for k, v in averages:
#     print("recall score, label", k, f"\n{v}")

# # 1 = task related, 0 aware 2 unaware



f1 score: [('0', 0.30781855798783436), ('1', 0.6100635969480307), ('2', 0.08807953950811094)]
precision score:  [('0', 0.262324667202716), ('1', 0.6545991913109261), ('2', 0.11805555555555554)]
recall: [('0', 0.39893516445240584), ('1', 0.5768806568982277), ('2', 0.07677970177970177)]


In [None]:
# from sklearn.metrics import roc_curve, auc

# logistic_fpr, logistic_tpr, threshold = roc_curve(y_test, y_pred_logistic)
# auc_logistic = auc(logistic_fpr, logistic_tpr)

# svm_fpr, svm_tpr, threshold = roc_curve(y_test, y_pred_svm)
# auc_svm = auc(svm_fpr, svm_tpr)

# plt.figure(figsize=(5, 5), dpi=100)
# plt.plot(svm_fpr, svm_tpr, linestyle='-', label='SVM (auc = %0.3f)' % auc_svm)
# plt.plot(logistic_fpr, logistic_tpr, marker='.', label='Logistic (auc = %0.3f)' % auc_logistic)

# plt.xlabel('False Positive Rate -->')
# plt.ylabel('True Positive Rate -->')

# plt.legend()

# plt.show()

# # one vs. rest
# from sklearn.metrics import roc_auc_score

# micro_roc_auc_ovr = roc_auc_score(
#     y_test,
#     y_score,
#     multi_class="ovr",
#     average="micro",
# )

In [None]:
##################### Macro F1 score: prediction with baseline #####################
# task-related    631
# aware           249
# unaware         147

sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0
f1_scores =[]
precision_scores =[]
recall_scores =[]

### stratifies group k fold
for i in range(10):
    for train_index, test_index in sgk.split(X, y, groups):
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y.loc[train_index], y.loc[test_index]
    
        # Fit Model on Train
        pipe.fit(X_train, y_train)
    
        y_pred = pipe.predict(X_test)
    
        # create baseline
        baseline = np.ones(len(y_pred))
        mw__aware_size = 24.24537/100  * len(y_pred)
        mw__unaware_size = 14.31353/100  * len(y_pred)
    
        baseline[:int(mw__aware_size)] = 0
        baseline[int(mw__aware_size): int(mw__aware_size) + int(mw__unaware_size)] = 2
        np.random.shuffle(baseline)
        baseline = baseline.astype(int)
    
        labels = [0, 1, 2]
        f1 = f1_score(y_test, baseline, average= "macro", labels=labels)
        recall = recall_score(y_test, baseline, average="macro", labels=labels)
        precision = precision_score(y_test, baseline, average="macro", labels=labels)

        # fpr, tpr, threshold = roc_curve(y_test, y_pred)
        # auc_score= auc(fpr, tpr)

        f1_scores.append(f1)
        precision_scores.append(precision)
        recall_scores.append(recall)
   #     auc_scores.append(auc_score)



        #print(f"f1 with labels  score for fold {fold}: ", auc_score)
        print(f"f1 with labels  score for fold {fold}: ", f1)
        print(f"recall with labels  score for fold {fold}: ", recall)
        print(f"precision with labels  score for fold {fold}: ", precision)

        fold += 1

mean_f1 = np.mean(f1_scores)
mean_recall = np.mean(recall_scores)
mean_precision = np.mean(precision_scores)

#mean_auc = np.mean(auc_scores)

print(f'Our mean fold f1 score is {mean_f1:0.4f}')
print(f'Our mean fold recall is {mean_recall:0.4f}')
print(f'Our mean fold precisionis {mean_precision:0.4f}')

#print(f'Our mean fold auc {mean_auc:0.4f}')

In [None]:
##################### each calss prediction with baseline #####################
# task-related    631
# aware           249
# unaware         147

import random
sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0
f1_scores ={"0": [], "1": [], "2": []}
precision_scores = {"0": [], "1": [], "2": []}
recall_scores ={"0": [], "1": [], "2": []}

### stratifies group k fold
for i in range(10):
    for train_index, test_index in sgk.split(X, y, groups):
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y.loc[train_index], y.loc[test_index]
    
        # Fit Model on Train
        pipe.fit(X_train, y_train)
    
        y_pred = pipe.predict(X_test)
    
        # create baseline
        baseline = np.ones(len(y_pred))
        mw__aware_size = 24.24537/100  * len(y_pred)
        mw__unaware_size = 14.31353/100  * len(y_pred)
    
        baseline[:int(mw__aware_size)] = 0
        baseline[int(mw__aware_size): int(mw__aware_size) + int(mw__unaware_size)] = 2
        np.random.shuffle(baseline)
        baseline = baseline.astype(int)

    
        labels = [0, 1, 2]
        f1 = f1_score(y_test, baseline, average=None, labels=labels)
        f1_scores["0"].append(f1[0])
        f1_scores["1"].append(f1[1])
        f1_scores["2"].append(f1[2])
        print(f1)
    
        recall = recall_score(y_test, baseline, average=None, labels=labels)
        recall_scores["0"].append(recall[0])
        recall_scores["1"].append(recall[1])
        recall_scores["2"].append(recall[2])
        #print(recall)
    
        precision = precision_score(y_test, baseline, average=None, labels=labels)
        precision_scores["0"].append(precision[0])
        precision_scores["1"].append(precision[1])
        precision_scores["2"].append(precision[2])
        #print(recall)
    
        #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
        #auc = metrics.auc(fpr, tpr)
    
        #print(f"f1 with labels  score for fold {fold}: ", auc)
        print(f"f1 with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, f1)})
        print(f"recall with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, recall)})
        print(f"precision with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, precision)})
        
        fold += 1


In [18]:
f1_averages = [(k, sum(v)/len(v)) for k, v in f1_scores.items()]
print("f1 score:",f1_averages)

precision_averages = [(k, sum(v)/len(v)) for k, v in precision_scores.items()]
print("precision:", precision_averages)

recall_averages = [(k, sum(v)/len(v)) for k, v in recall_scores.items()]
print("recall:", recall_averages)

f1 score: [('0', 0.24422749932144583), ('1', 0.6154369720912743), ('2', 0.1337215297569219)]
precision: [('0', 0.24554979816985903), ('1', 0.6150007595603985), ('2', 0.13507104086845467)]
recall: [('0', 0.24628501766432806), ('1', 0.6179746758754394), ('2', 0.1332135642135642)]


In [63]:
##################### macro f1 score preprocessing with pipleline #####################
imputer =  SimpleImputer(fill_value='missing')
scaler = StandardScaler()
over = SMOTE(random_state= 27) #sampling_strategy=0.1

# optimized XGBoost
model = xgb.XGBClassifier(objective="objective=multi:softmax", random_state=42, colsample_bytree =1.0, max_depth = 6, n_estimators= 700, subsample = 1, scale_pos_weight = 10)

# optimzied GaussianNB
#model = GaussianNB(var_smoothing = 0.001873817422860383)


# optimzied Random forrest classifier
#model = RandomForestClassifier(random_state=0, bootstrap = True, max_depth = 25,  max_features = "auto", min_samples_leaf = 1, min_samples_split= 1, n_estimators = 50, class_weight="balanced")

# optimized SVC
#{'model__C': 0.6, 'model__gamma': 0.001, 'model__kernel': 'rbf'}
#model = SVC(kernel="rbf", C= 0.6, gamma = 0.001, probability=True, class_weight='balanced')

#model = SVC(kernel="linear", C= 0.8, gamma = 0.2, probability=True)

#steps = [('imputer', imputer), ('scaler',scaler),('over', over), ('model', model)]
# with weights
steps = [('imputer', imputer), ('scaler',scaler), ('model', model)]

pipe = Pipeline(steps=steps)

In [64]:
############# Balancing with weights naive bayes

from sklearn.utils import class_weight

##################### prediction without baseline, macro f1 score #####################
import random
from sklearn import metrics
from sklearn.metrics import  auc

sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0

f1_scores = []
precision_scores =[]
recall_scores =[]

auc_scores = []

### stratifies group k fold
for train_index, test_index in sgk.split(X, y, groups):
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y.loc[train_index], y.loc[test_index]

    # sample weights
    sample_weights = class_weight.compute_sample_weight(class_weight = 'balanced',  y = y_train)

    # Fit Model on Train
    #pipe.fit(X_train, y_train, **{'model__sample_weight': sample_weights})
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)

    #accurancy_score = accuracy_score(y_test, y_pred)
    labels = [0, 1, 2]
    f1 = f1_score(y_test, y_pred, average= "macro", labels=labels)

    recall = recall_score(y_test, y_pred, average="macro", labels=labels)
    precision = precision_score(y_test, y_pred, average="macro", labels=labels)      
    
    f1_scores.append(f1)
    precision_scores.append(precision)
    recall_scores.append(recall)

    print(f"f1 with labels  score for fold {fold}: ", f1)
    print(f"recall with labels  score for fold {fold}: ", recall)
    print(f"precision with labels  score for fold {fold}: ", precision)
    fold += 1

mean_f1 = np.mean(f1_scores)
mean_recall = np.mean(recall_scores)
mean_precision = np.mean(precision_scores)

print(f'Our mean fold f1 score is {mean_f1:0.4f}')
print(f'Our mean fold recall is {mean_recall:0.4f}')
print(f'Our mean fold precisionis {mean_precision:0.4f}')


#Our mean fold f1 score is 0.2788
# Our mean fold recall is 0.3509
# Our mean fold precisionis 0.3484


Parameters: { "scale_pos_weight" } are not used.

f1 with labels  score for fold 0:  0.26095516326234586
recall with labels  score for fold 0:  0.3296296296296296
precision with labels  score for fold 0:  0.33900625978090765
Parameters: { "scale_pos_weight" } are not used.

f1 with labels  score for fold 1:  0.293519887764001
recall with labels  score for fold 1:  0.3194797851286401
precision with labels  score for fold 1:  0.27952790072029854
Parameters: { "scale_pos_weight" } are not used.

f1 with labels  score for fold 2:  0.3199163932932385
recall with labels  score for fold 2:  0.35194911297852477
precision with labels  score for fold 2:  0.4217171717171717
Parameters: { "scale_pos_weight" } are not used.

f1 with labels  score for fold 3:  0.2964858955900389
recall with labels  score for fold 3:  0.32987012987012987
precision with labels  score for fold 3:  0.3044363044363045
Parameters: { "scale_pos_weight" } are not used.

f1 with labels  score for fold 4:  0.28970276731470757

In [65]:
##################### prediction for each class without baseline #####################
sgk = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=529)

fold = 0

f1_scores ={"0": [], "1": [], "2": []}
precision_scores = {"0": [], "1": [], "2": []}
recall_scores ={"0": [], "1": [], "2": []}

### stratifies group k fold
for train_index, test_index in sgk.split(X, y, groups):
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y.loc[train_index], y.loc[test_index]

    # # sample weights
    sample_weights = class_weight.compute_sample_weight(class_weight = 'balanced',  y = y_train)
    # Fit Model on Train
    pipe.fit(X_train, y_train, **{'model__sample_weight': sample_weights})
    print(pipe)

    y_pred = pipe.predict(X_test)

    #accurancy_score = accuracy_score(y_test, y_pred)

    labels = [0, 1, 2]
    f1 = f1_score(y_test, y_pred, average= None, labels=labels)
    f1_scores["0"].append(f1[0])
    f1_scores["1"].append(f1[1])
    f1_scores["2"].append(f1[2])
  

    recall = recall_score(y_test, y_pred, average=None, labels=labels)
    recall_scores["0"].append(recall[0])
    recall_scores["1"].append(recall[1])
    recall_scores["2"].append(recall[2])
    #print(recall)

    precision = precision_score(y_test, y_pred, average=None, labels=labels)
    precision_scores["0"].append(precision[0])
    precision_scores["1"].append(precision[1])
    precision_scores["2"].append(precision[2])
    #print(recall)

    #fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
    #auc = metrics.auc(fpr, tpr)

    #print(f"f1 with labels  score for fold {fold}: ", auc)
    print(f"f1 with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, f1)})
    print(f"recall with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, recall)})
    print(f"precision with labels  score for fold {fold}: ", {label:score for label,score in zip(labels, precision)})
    
    fold += 1

# # 1 = task related, 0 aware 2 unaware


Parameters: { "scale_pos_weight" } are not used.

Pipeline(steps=[('imputer', SimpleImputer(fill_value='missing')),
                ('scaler', StandardScaler()),
                ('model',
                 XGBClassifier(base_score=None, booster=None, callbacks=None,
                               colsample_bylevel=None, colsample_bynode=None,
                               colsample_bytree=1.0, early_stopping_rounds=None,
                               enable_categorical=False, eval_metric=None,
                               feature_types=None, gamma=None, gpu_id=None,
                               grow_policy=None, importance_type=None,
                               interaction_constraints=None, learning_rate=None,
                               max_bin=None, max_cat_threshold=None,
                               max_cat_to_onehot=None, max_delta_step=None,
                               max_depth=6, max_leaves=None,
                               min_child_weight=None, missing=nan,

In [62]:
averages = [(k, sum(v)/len(v)) for k, v in f1_scores.items()]
print("f1 score:", averages)

averages = [(k, sum(v)/len(v)) for k, v in precision_scores.items()]
print("precision score: ", averages)

averages = [(k, sum(v)/len(v)) for k, v in recall_scores.items()]
print("recall:", averages)


# # 1 = task related, 0 aware 2 unaware

# f1 score: [('0', 0.35398526125547203), ('1', 0.3711727446863309), ('2', 0.1209414698776401)]
# precision score:  [('0', 0.25211613582691406), ('1', 0.6135062978288349), ('2', 0.16936507936507936)]
# recall: [('0', 0.6506636562671047), ('1', 0.2939542716079683), ('2', 0.09741702741702742)]

f1 score: [('0', 0.007692307692307693), ('1', 0.0), ('2', 0.2469730077037009)]
precision score:  [('0', 0.05), ('1', 0.0), ('2', 0.14153013427371228)]
recall: [('0', 0.004166666666666667), ('1', 0.0), ('2', 0.9800000000000001)]


In [None]:



# pipeline
# wieghts must be added in the fit step

pipeline = Pipeline(steps=[("NLP", TfidfVectorizer(),
                           ("MNB", MultinomialNB())
                          ])
pipeline.fit(X_train, 
             y_train, 
             **{'MNB__sample_weight': sample_weights})