In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [3]:
'''
read the data from the files
check if you nead to update path
'''
EEG_features = pd.read_csv("output_b/df_EEG_b_features_Tob_-3_toTob_0.csv")
paymentMethod = pd.read_csv("df_painOfPayment_method.csv")
painOfPayment_score = pd.read_csv("df_painOfPayment_score.csv")
painOfPayment_score_bins = painOfPayment_score[['subject', 'PainOfPayment_categorical_score']]

In [4]:
def merge_all():
    df_merged = pd.merge(EEG_features, paymentMethod, on="subject", how="inner")
    df_merged = pd.merge(df_merged, painOfPayment_score_bins, on="subject", how="inner")

    return df_merged

In [5]:
'''
initialize variables
'''
waves = ['delta', 'theta', 'alpha', 'beta', 'gamma']
df_data = merge_all()

In [6]:
def model_RandomForest(X_train,y_train):
    
    # Number of trees in Random Forest
    n_estimators = [50, 100, 200, 300]
    
    # Number of features to consider at every split
    max_features = ['sqrt', 'log2', None]
    
    # Maximum number of levels in tree
    max_depth = [int(x) for x in np.linspace(5, 50, 10)]
    # Add the default as a possible value
    max_depth.append(None)
      
    param_grid = {'n_estimators': n_estimators,
                  'max_features': max_features,
                  'max_depth':max_depth,
                 }
    
    rf_base = RandomForestClassifier(random_state = 42)
    
    '''
    Create the grid search Random Forest:
    GridSearchCV
        GridSearch: taking all of paramters combination from the grid parameters to find the best hyper parameters
        CV: using cross validation 
    cv = 5 (5-fold cross validation) --> validation_set = 20% from all data (0.8 X 0.25 = 0.2)
    ''' 
    rf_search = GridSearchCV(estimator = rf_base, param_grid = param_grid, 
                               cv = 5, verbose = 2, n_jobs = -1)
    
    # Fit the random search model
    rf_search.fit(X_train, y_train)
    
    return rf_search

In [7]:
def model_SVC(X_train, y_train):
    
    # 
    C = [0.1,1, 10, 100, 1000]
    
    # 
    gamma = [1,0.1,0.01,0.001,0.0001]
       
    param_grid = {'C': C, 'gamma': gamma}
    
    SVC_base = SVC(random_state = 42)
    
    '''
    Create the grid search SVC:
    GridSearchCV
        GridSearch: taking all of paramters combination from the grid parameters to find the best hyper parameters
        CV: using cross validation 
    cv = 5 (5-fold cross validation) --> validation_set = 20% from all data (0.8 X 0.25 = 0.2)
    ''' 
    
    SVC_search = GridSearchCV(estimator = SVC_base, param_grid = param_grid, 
                               cv = 5, verbose = 2, n_jobs = -1)
    
    # Fit the random search model
    SVC_search.fit(X_train, y_train)
    
    return SVC_search

In [8]:
'''
get model hyperParameters
'''
def get_hyperParameters(X_train, y_train):
    rfc_serach = model_RandomForest(X_train, y_train)
    print(rfc_serach.best_params_)

    SVC_serach = model_SVC(X_train, y_train)
    print(SVC_serach.best_params_)

In [9]:
'''
get model predictions
Call predict on the estimator with the best found parameters.
'''
def get_model_1_predicrions(X_train, X_test, y_train, y_test):

    # random forest 
    # using: {'n_estimators': 50, 'max_features': sqrt, 'max_depth':Â 5}

    rfc = RandomForestClassifier(n_estimators = 50, max_features = 'sqrt', max_depth = 5, random_state = 42)
    rfc.fit(X_train, y_train)

    rfc_predictions = rfc.predict(X_test)
    print(confusion_matrix(y_test,rfc_predictions))
    print(classification_report(y_test,rfc_predictions))

    # SVM
    # using: {'C': 0.1, 'gamma': 1}
    svc = SVC(C = 0.1, gamma = 1, random_state = 42)
    svc.fit(X_train, y_train)

    svc_predictions = svc.predict(X_test)
    print(confusion_matrix(y_test,svc_predictions))
    print(classification_report(y_test,svc_predictions))


    # XGboost

In [13]:
'''
MAIN function to get model 1
'''
# Model 1 (first question): Tries to predict the payment method based on the EEG_features and painOfPayment

X = df_data[waves]
y = df_data['Payment_method']

# test size is 20% from all data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

get_hyperParameters(X_train, y_train)

get_model_1_predicrions(X_train, X_test, y_train, y_test)

[[2 1 0]
 [2 3 2]
 [2 2 2]]
              precision    recall  f1-score   support

        Cash       0.33      0.67      0.44         3
 Credit Card       0.50      0.43      0.46         7
  Smartphone       0.50      0.33      0.40         6

    accuracy                           0.44        16
   macro avg       0.44      0.48      0.44        16
weighted avg       0.47      0.44      0.44        16

[[0 3 0]
 [0 7 0]
 [0 6 0]]
              precision    recall  f1-score   support

        Cash       0.00      0.00      0.00         3
 Credit Card       0.44      1.00      0.61         7
  Smartphone       0.00      0.00      0.00         6

    accuracy                           0.44        16
   macro avg       0.15      0.33      0.20        16
weighted avg       0.19      0.44      0.27        16



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
'''
MAIN function to get model 2
'''
# Model 2 (second question) : Tries to predict the painOfPayment score based on the EEG_features


'\nMAIN function to get model 2\n'

In [12]:
'''
MAIN function to get model 3
'''
# Model 3 (thies question) :  Tries to predict the payment amount based on the EEG_features

'\nMAIN function to get model 3\n'