### Imports

In [None]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler

from tabulate import tabulate

### Function for CV

In [2]:
def stratifiedkfold(kf, grid_search, y_true, y_pred, data, target):
    for train_index, test_index in kf.split(data, target):

        # Training phase
        x_train = data.iloc[train_index]
        y_train = target.iloc[train_index]
        
        rus = RandomUnderSampler(random_state=42)
        x_train_resampled, y_train_resampled = rus.fit_resample(x_train, y_train)

        grid_search.fit(x_train_resampled, y_train_resampled)
        
        clf_kf_cv = grid_search.best_estimator_
        
        print('Best parameters: ', grid_search.best_params_)

        # Test phase
        x_test = data.iloc[test_index]
        y_test = target.iloc[test_index]  
        y_true.append(y_test)
        y_pred.append(clf_kf_cv.predict(x_test))
    pass

# P300

## Archivo Michel

In [3]:
p300mich1 = pd.read_csv('MichFlechasFeatures.txt', sep = "\t", header = None)
useless_rows = []
for index, row in p300mich1.iterrows():
    if row[1] == 0:
        useless_rows.append(index)

p300mich1 = p300mich1.drop(useless_rows)
p300mich1 = p300mich1.drop([1,152], axis=1)
p300mich1 = p300mich1.reset_index(drop = True)
p300mich1.head()

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,142,143,144,145,146,147,148,149,150,151
0,1,-2.559311,-2.226564,-0.969904,0.895154,2.600135,3.008595,1.716012,-0.238857,-1.370326,...,0.276208,0.365666,0.423463,0.178581,-0.555389,-1.404109,-1.761936,-1.596914,-1.51676,-1.883596
1,1,-0.591904,-1.239847,-1.306299,-1.292547,-1.716696,-2.312961,-2.448143,-2.158418,-2.096092,...,0.730268,0.146788,-0.710786,-0.948481,-0.787767,-0.954385,-1.287491,-1.103642,-0.582118,-0.533074
2,1,-0.336419,-0.420086,-0.667222,-0.341556,0.283712,0.512859,0.42574,0.558908,0.783503,...,0.346338,-0.258329,-0.970773,-1.378206,-1.369884,-1.148282,-0.729237,0.0773,1.059575,1.483101
3,1,0.105759,0.126904,0.64337,1.21569,1.256958,1.04102,1.077285,1.060043,0.446895,...,1.434471,1.564873,1.367216,1.337781,1.212297,0.362325,-0.997121,-1.709845,-1.038044,0.341079
4,1,-1.944094,-1.124948,0.23313,0.907609,0.518125,-0.013713,0.093072,0.257662,-0.522591,...,1.196387,1.489753,1.76663,2.063998,2.038592,1.403001,0.409933,-0.298227,-0.348757,-0.012059


### Function for CV

In [4]:
def p300mich1_cross_validate(classifier, hyperparameters):
    
    p300mich1_target = p300mich1.iloc[:,0]
    p300mich1_data = p300mich1.iloc[:,1:]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, p300mich1_data, p300mich1_target)

    class_names = ["P300", "NoP300"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    report = report.replace("macro avg", "macroavg")
    report = report.replace("weighted avg", "weightedavg")
    rows = report.split('\n')
    rows = [row.split() for row in rows if row.strip()]
    return tabulate(rows, headers='firstrow', tablefmt='fancy_grid')

### Support Vector Classifier

In [5]:
svc_hyperparameters = {'C': [0.1, 1, 10],'kernel': ['linear', 'rbf']}
print(p300mich1_cross_validate(SVC(), svc_hyperparameters))

Best parameters:  {'C': 1, 'kernel': 'rbf'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 0.1, 'kernel': 'linear'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.41 │     0.74 │       0.52 │       204 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.92 │     0.74 │       0.82 │       826 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.74 │  1030    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.66 │     0.74 │       0.67 │      1030 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.82 │     0.74 │      

### k-Nearest Neighbors

In [6]:
knn_hyperparameters = {'n_neighbors': [3, 5, 7], 'weights': ['uniform', 'distance'],'metric': ['euclidean', 'manhattan']}
print(p300mich1_cross_validate(KNeighborsClassifier(), knn_hyperparameters))

Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.32 │     0.8  │       0.46 │       204 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.92 │     0.58 │       0.71 │       826 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.62 │  1030    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼─────────

### Multilayer Neural Network

In [7]:
mlp_hyperparameters = {'hidden_layer_sizes': [(2,2), (3,3)]}
print(p300mich1_cross_validate(MLPClassifier(max_iter = 10000), mlp_hyperparameters))

Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.37 │     0.76 │       0.5  │       204 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.92 │     0.68 │       0.78 │       826 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.7  │  1030    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.64 │     0.72 │       0.64 │      1030 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.81 │

### Random Forest

In [8]:
rf_hyperparameters = {'n_estimators': [450, 500]}
print(p300mich1_cross_validate(RandomForestClassifier(), rf_hyperparameters))

Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.43 │     0.74 │       0.54 │       204 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.92 │     0.76 │       0.83 │       826 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.75 │  1030    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.67 │     0.75 │       0.68 │      1030 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.82 │     0.75 │       0.77 │      1030 │
╘═══════

### Gradient Boosting

In [9]:
xgb_hyperparameters = {'n_estimators': [150, 200]}
print(p300mich1_cross_validate(GradientBoostingClassifier(), xgb_hyperparameters))

Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.41 │     0.76 │       0.53 │       204 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.93 │     0.73 │       0.81 │       826 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.73 │  1030    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.67 │     0.75 │       0.67 │      1030 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.82 │     0.73 │       0.76 │      1030 │
╘═══════

## Archivo Yamuni

In [10]:
p300yam = pd.read_csv('YamuniFlechasFeatures2.txt', sep = "\t", header = None)
useless_rows = []
for index, row in p300yam.iterrows():
    if row[1] == 0:
        useless_rows.append(index)

p300yam = p300yam.drop(useless_rows)
p300yam = p300yam.drop([1,152], axis=1)
p300yam = p300yam.reset_index(drop = True)
p300yam.head()

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,142,143,144,145,146,147,148,149,150,151
0,1,-0.919229,-1.380718,-1.124147,-0.729663,-0.817537,-1.213585,-1.292274,-0.801226,-0.080372,...,0.247962,-0.858389,-2.082738,-2.164006,-0.928395,0.486746,0.880277,0.134915,-0.949618,-1.546702
1,1,0.925647,0.6676,0.291659,0.223115,0.295247,0.392524,0.957851,2.070606,2.635877,...,-0.879356,-0.762943,0.140491,1.201332,1.589458,1.140753,0.312094,-0.412227,-0.750518,-0.571281
2,1,-0.337402,0.367344,1.726566,2.420437,1.574213,0.209781,-0.176053,0.267794,0.252293,...,-0.213049,0.288395,0.901006,0.768329,0.336552,0.655676,1.653647,2.188109,1.764756,1.147146
3,1,-0.130009,0.23822,0.835611,1.118757,0.807759,0.373482,0.198794,-0.004598,-0.408918,...,-1.409317,-0.713858,-0.084495,-0.338215,-0.667759,-0.210293,0.31021,-0.116875,-0.790848,-0.271671
4,1,0.586681,0.406013,0.399015,0.95845,1.591712,1.449305,0.598255,-0.01746,0.138447,...,0.844724,0.394012,-0.077335,0.029243,0.523625,0.525145,-0.258986,-1.040792,-0.954833,-0.064465


### Function for CV

In [11]:
def p300yam_cross_validate(classifier, hyperparameters):
    
    p300yam_target = p300yam.iloc[:,0]
    p300yam_data = p300yam.iloc[:,1:]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, p300yam_data, p300yam_target)

    class_names = ["P300", "NoP300"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    report = report.replace("macro avg", "macroavg")
    report = report.replace("weighted avg", "weightedavg")
    rows = report.split('\n')
    rows = [row.split() for row in rows if row.strip()]
    return tabulate(rows, headers='firstrow', tablefmt='fancy_grid')

### Support Vector Classifier

In [12]:
print(p300yam_cross_validate(SVC(), svc_hyperparameters))

Best parameters:  {'C': 1, 'kernel': 'rbf'}
Best parameters:  {'C': 1, 'kernel': 'rbf'}
Best parameters:  {'C': 0.1, 'kernel': 'rbf'}
Best parameters:  {'C': 0.1, 'kernel': 'rbf'}
Best parameters:  {'C': 1, 'kernel': 'linear'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.36 │     0.65 │       0.47 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.89 │     0.72 │       0.79 │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.7  │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.63 │     0.68 │       0.63 │      1348 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.79 │     0.7  │       

### k-Nearest Neighbors

In [13]:
print(p300yam_cross_validate(KNeighborsClassifier(), knn_hyperparameters))

Best parameters:  {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.27 │     0.54 │       0.36 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.85 │     0.63 │       0.73 │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.61 │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼─────────

### Multilayer Neural Network

In [14]:
print(p300yam_cross_validate(MLPClassifier(max_iter=10000), mlp_hyperparameters))

Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.32 │     0.69 │       0.44 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.89 │     0.64 │       0.74 │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.65 │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.61 │     0.67 │       0.59 │      1348 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.78 │

### Random Forest

In [15]:
print(p300yam_cross_validate(RandomForestClassifier(), rf_hyperparameters))

Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 500}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.36 │     0.72 │       0.48 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.91 │     0.68 │       0.77 │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.68 │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.63 │     0.7  │       0.62 │      1348 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.8  │     0.68 │       0.71 │      1348 │
╘═══════

### Gradient Boosting

In [16]:
print(p300yam_cross_validate(GradientBoostingClassifier(), xgb_hyperparameters))

Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.34 │     0.65 │       0.45 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.89 │     0.68 │       0.77 │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.67 │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.61 │     0.67 │       0.61 │      1348 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.78 │     0.67 │       0.7  │      1348 │
╘═══════

## Cognitive Tasks (Two Classes)

### Archivo Michel

In [17]:
ctmich = pd.read_csv('MichTareasFeatures.txt', sep = "\t", header = None)
useless_rows = []
for index, row in ctmich.iterrows():
    if row[1] == 0:
        useless_rows.append(index)

ctmich = ctmich.drop(useless_rows)
ctmich = ctmich.drop([1,218], axis=1)
ctmich = ctmich.reset_index(drop = True)

for index, row in ctmich.iterrows():
    if row[0] in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]:
        ctmich.at[index, 0] = 1
    else:
        ctmich.at[index, 0] = 0
        
ctmich.head()

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,208,209,210,211,212,213,214,215,216,217
0,1,8.889212,10.181752,8.503275,7.361181,6.10086,6.291361,4.656164,4.174572,10.313213,...,1.842248,1.854458,0.571283,0.964798,0.929418,1.552971,1.296322,1.023829,1.673087,1.510133
1,1,5.652767,8.042858,3.24738,4.13467,3.162395,9.280908,4.45786,4.711377,7.119301,...,2.245265,2.127966,1.155517,1.398074,0.632108,1.389232,1.681869,1.605331,2.264162,2.176949
2,1,4.104936,7.174717,4.713861,5.53653,5.891949,4.063717,3.889408,4.984165,4.448951,...,1.122711,0.781695,0.687759,0.432641,0.441181,1.244465,0.679333,0.72653,1.093091,0.795292
3,1,19.788445,15.877236,22.369973,20.095686,15.335189,22.178942,4.841323,6.310983,20.518696,...,1.461832,0.739749,0.722074,0.527056,1.162233,1.249377,1.274189,0.873493,1.513659,0.701651
4,1,28.094811,12.783838,4.219859,7.865875,8.862163,15.560597,9.503334,3.914084,35.476962,...,1.188925,1.403753,0.59869,0.525371,0.425958,1.021527,0.860188,0.894223,1.108798,1.328624


### Function for CV

In [18]:
def ctmich_cross_validate(classifier, hyperparameters):
    
    ctmich_target = ctmich.iloc[:,0]
    ctmich_data = ctmich.iloc[:,1:]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, ctmich_data, ctmich_target)

    class_names = ["NoTask", "Task"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    report = report.replace("macro avg", "macroavg")
    report = report.replace("weighted avg", "weightedavg")
    rows = report.split('\n')
    rows = [row.split() for row in rows if row.strip()]
    return tabulate(rows, headers='firstrow', tablefmt='fancy_grid')

### Support Vector Classifier

In [19]:
print(ctmich_cross_validate(SVC(), svc_hyperparameters))

Best parameters:  {'C': 1, 'kernel': 'linear'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 1, 'kernel': 'linear'}
Best parameters:  {'C': 1, 'kernel': 'linear'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.51 │     0.61 │       0.55 │        75 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.75 │     0.65 │       0.7  │       130 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.64 │   205    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.63 │     0.63 │       0.63 │       205 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.66 │     0.64 │   

### k-Nearest Neighbors

In [20]:
print(ctmich_cross_validate(KNeighborsClassifier(), knn_hyperparameters))

Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.44 │     0.59 │       0.51 │        75 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.71 │     0.58 │       0.64 │       130 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.58 │   205    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼─────────

### Multilayer Neural Network

In [21]:
print(ctmich_cross_validate(MLPClassifier(max_iter=10000), mlp_hyperparameters))

Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.41 │     0.55 │       0.47 │        75 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.67 │     0.54 │       0.6  │       130 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.54 │   205    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.54 │     0.54 │       0.53 │       205 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.58 │

### Random Forest

In [22]:
print(ctmich_cross_validate(RandomForestClassifier(), rf_hyperparameters))

Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.48 │     0.59 │       0.53 │        75 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.73 │     0.63 │       0.67 │       130 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.61 │   205    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.6  │     0.61 │       0.6  │       205 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.64 │     0.61 │       0.62 │       205 │
╘═══════

### Gradient Boosting

In [23]:
print(ctmich_cross_validate(GradientBoostingClassifier(), xgb_hyperparameters))

Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 200}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.41 │     0.51 │       0.45 │        75 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.67 │     0.58 │       0.62 │       130 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.55 │   205    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.54 │     0.54 │       0.54 │       205 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.57 │     0.55 │       0.56 │       205 │
╘═══════

### Archivo Yamuni

In [24]:
ctyam = pd.read_csv('YamuniTareasFeatures.txt', sep = "\t", header = None)
useless_rows = []
for index, row in ctyam.iterrows():
    if row[1] == 0:
        useless_rows.append(index)

ctyam = ctyam.drop(useless_rows)
ctyam = ctyam.drop([1,218], axis=1)
ctyam = ctyam.reset_index(drop = True)

for index, row in ctyam.iterrows():
    if row[0] in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]:
        ctyam.at[index, 0] = 1
    else:
        ctyam.at[index, 0] = 0

ctyam.head()

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,208,209,210,211,212,213,214,215,216,217
0,1,4.082759,3.540026,6.813089,1.644179,1.928761,4.578276,2.663328,1.685491,5.154945,...,2.011693,2.78111,0.623268,1.017995,0.620958,1.085525,1.5398,1.575473,1.759159,2.582784
1,1,17.997407,4.182849,16.891554,3.791352,2.737425,4.041567,2.842607,3.290411,19.564377,...,1.643242,1.77191,1.771657,2.487338,1.780088,1.57542,1.510829,1.84745,1.592856,1.645893
2,1,6.645055,2.804339,5.159441,3.592543,2.646521,1.565106,2.257845,3.229605,6.922075,...,2.563288,3.320609,0.794643,0.81226,0.925568,1.099832,1.348091,1.324041,2.312848,3.191038
3,1,9.86066,4.002138,9.453912,3.52029,4.90357,4.600238,5.796736,6.239554,11.612068,...,1.6038,2.179609,1.369098,1.533471,0.997768,1.436147,1.539725,1.451312,1.481863,1.940598
4,1,3.546819,2.251571,7.206801,3.309408,2.760355,4.053771,2.604074,1.768247,3.601283,...,1.006246,1.812694,0.867581,0.685329,0.585637,0.508474,0.811307,0.825016,0.722866,1.442172


### Function for CV

In [25]:
def ctyam_cross_validate(classifier, hyperparameters):
    
    ctyam_target = ctyam.iloc[:,0]
    ctyam_data = ctyam.iloc[:,1:]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, ctyam_data, ctyam_target)

    class_names = ["NoTask", "Task"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    report = report.replace("macro avg", "macroavg")
    report = report.replace("weighted avg", "weightedavg")
    rows = report.split('\n')
    rows = [row.split() for row in rows if row.strip()]
    return tabulate(rows, headers='firstrow', tablefmt='fancy_grid')

### Support Vector Classifier

In [26]:
print(ctyam_cross_validate(SVC(), svc_hyperparameters))

Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 1, 'kernel': 'linear'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.48 │     0.67 │       0.56 │        60 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.84 │     0.72 │       0.78 │       152 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.7  │   212    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.66 │     0.69 │       0.67 │       212 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.74 │     0.7  │       

### k-Nearest Neighbors

In [27]:
print(ctyam_cross_validate(KNeighborsClassifier(), knn_hyperparameters))

Best parameters:  {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.41 │     0.65 │       0.51 │        60 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.82 │     0.64 │       0.72 │       152 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.64 │   212    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼─────────

### Multilayer Neural Network

In [28]:
print(ctyam_cross_validate(MLPClassifier(max_iter=10000), mlp_hyperparameters))

Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.32 │     0.67 │       0.43 │        60 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.77 │     0.43 │       0.55 │       152 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.5  │   212    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.54 │     0.55 │       0.49 │       212 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.64 │

### Random Forest

In [29]:
print(ctyam_cross_validate(RandomForestClassifier(), rf_hyperparameters))

Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.38 │     0.6  │       0.47 │        60 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.8  │     0.62 │       0.7  │       152 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.61 │   212    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.59 │     0.61 │       0.58 │       212 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.68 │     0.61 │       0.63 │       212 │
╘═══════

### Gradient Boosting

In [30]:
print(ctyam_cross_validate(GradientBoostingClassifier(), xgb_hyperparameters))

Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ NoTask      │        0.31 │     0.48 │       0.38 │        60 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Task        │        0.74 │     0.57 │       0.64 │       152 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.55 │   212    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.52 │     0.53 │       0.51 │       212 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.62 │     0.55 │       0.57 │       212 │
╘═══════

# Cognitive Tasks (Three classes)

## Archivo Michel

In [31]:
ctmich2 = pd.read_csv('MichTareasFeatures.txt', sep = "\t", header = None)
useless_rows = []
for index, row in ctmich2.iterrows():
    if row[1] == 0:
        useless_rows.append(index)

ctmich2 = ctmich2.drop(useless_rows)
ctmich2 = ctmich2.drop([1,218], axis=1)
ctmich2 = ctmich2.reset_index(drop = True)

rows_to_drop = []
for index, row in ctmich2.iterrows():
    if row[0] in [1, 2, 3, 4]:
        ctmich2.at[index, 0] = 1
    elif row[0] in [5, 6, 7, 8]:
        ctmich2.at[index, 0] = 2
    elif row[0] in [9, 10, 11, 12]:
        ctmich2.at[index, 0] = 3
    else:
        rows_to_drop.append(index)
ctmich2 = ctmich2.drop(rows_to_drop)
        
ctmich2.head()

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,208,209,210,211,212,213,214,215,216,217
0,1,8.889212,10.181752,8.503275,7.361181,6.10086,6.291361,4.656164,4.174572,10.313213,...,1.842248,1.854458,0.571283,0.964798,0.929418,1.552971,1.296322,1.023829,1.673087,1.510133
1,1,5.652767,8.042858,3.24738,4.13467,3.162395,9.280908,4.45786,4.711377,7.119301,...,2.245265,2.127966,1.155517,1.398074,0.632108,1.389232,1.681869,1.605331,2.264162,2.176949
2,1,4.104936,7.174717,4.713861,5.53653,5.891949,4.063717,3.889408,4.984165,4.448951,...,1.122711,0.781695,0.687759,0.432641,0.441181,1.244465,0.679333,0.72653,1.093091,0.795292
3,1,19.788445,15.877236,22.369973,20.095686,15.335189,22.178942,4.841323,6.310983,20.518696,...,1.461832,0.739749,0.722074,0.527056,1.162233,1.249377,1.274189,0.873493,1.513659,0.701651
4,1,28.094811,12.783838,4.219859,7.865875,8.862163,15.560597,9.503334,3.914084,35.476962,...,1.188925,1.403753,0.59869,0.525371,0.425958,1.021527,0.860188,0.894223,1.108798,1.328624


### Function for CV

In [32]:
def ctmich2_cross_validate(classifier, hyperparameters):
    
    ctmich2_target = ctmich2.iloc[:,0]
    ctmich2_data = ctmich2.iloc[:,1:]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, ctmich2_data, ctmich2_target)

    class_names = ["Recordar", "Lectura", "Calculo"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    report = report.replace("macro avg", "macroavg")
    report = report.replace("weighted avg", "weightedavg")
    rows = report.split('\n')
    rows = [row.split() for row in rows if row.strip()]
    return tabulate(rows, headers='firstrow', tablefmt='fancy_grid')

### Support Vector Classifier

In [33]:
print(ctmich2_cross_validate(SVC(), svc_hyperparameters))

Best parameters:  {'C': 1, 'kernel': 'linear'}
Best parameters:  {'C': 0.1, 'kernel': 'linear'}
Best parameters:  {'C': 1, 'kernel': 'linear'}
Best parameters:  {'C': 0.1, 'kernel': 'linear'}
Best parameters:  {'C': 0.1, 'kernel': 'linear'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.4  │     0.47 │       0.43 │        43 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.89 │     0.74 │       0.81 │        42 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.47 │     0.47 │       0.47 │        45 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.55 │   130    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.58 │    

### k-Nearest Neighbors

In [34]:
print(ctmich2_cross_validate(KNeighborsClassifier(), knn_hyperparameters))

Best parameters:  {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.46 │     0.51 │       0.48 │        43 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.72 │     0.79 │       0.75 │        42 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.61 │     0.49 │       0.54 │        45 │
├─────────────┼─────────────┼──────────┼────────────┼──────

### Multilayer Neural Network

In [35]:
print(ctmich2_cross_validate(MLPClassifier(max_iter=10000), mlp_hyperparameters))

Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.5  │     0.09 │       0.16 │        43 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.44 │     0.83 │       0.57 │        42 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.48 │     0.44 │       0.46 │        45 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.45 │   130    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.47 │

### Random Forest

In [36]:
print(ctmich2_cross_validate(RandomForestClassifier(), rf_hyperparameters))

Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 500}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.47 │     0.37 │       0.42 │        43 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.71 │     0.86 │       0.77 │        42 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.51 │     0.51 │       0.51 │        45 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.58 │   130    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.56 │     0.58 │       0.57 │       130 │
├───────

### Gradient Boosting

In [37]:
print(ctmich2_cross_validate(GradientBoostingClassifier(), xgb_hyperparameters))

Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.39 │     0.42 │       0.4  │        43 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.71 │     0.76 │       0.74 │        42 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.44 │     0.38 │       0.4  │        45 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.52 │   130    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.51 │     0.52 │       0.51 │       130 │
├───────

## Archivo Yamuni

In [38]:
ctyam2 = pd.read_csv('YamuniTareasFeatures.txt', sep = "\t", header = None)
useless_rows = []
for index, row in ctyam2.iterrows():
    if row[1] == 0:
        useless_rows.append(index)

ctyam2 = ctyam2.drop(useless_rows)
ctyam2 = ctyam2.drop([1,218], axis=1)
ctyam2 = ctyam2.reset_index(drop = True)

rows_to_drop = []
for index, row in ctyam2.iterrows():
    if row[0] in [1, 2, 3, 4]:
        ctyam2.at[index, 0] = 1
    elif row[0] in [5, 6, 7, 8]:
        ctyam2.at[index, 0] = 2
    elif row[0] in [9, 10, 11, 12]:
        ctyam2.at[index, 0] = 3
    else:
        rows_to_drop.append(index)
ctyam2 = ctyam2.drop(rows_to_drop)
        
ctyam2.head()

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,208,209,210,211,212,213,214,215,216,217
0,1,4.082759,3.540026,6.813089,1.644179,1.928761,4.578276,2.663328,1.685491,5.154945,...,2.011693,2.78111,0.623268,1.017995,0.620958,1.085525,1.5398,1.575473,1.759159,2.582784
1,1,17.997407,4.182849,16.891554,3.791352,2.737425,4.041567,2.842607,3.290411,19.564377,...,1.643242,1.77191,1.771657,2.487338,1.780088,1.57542,1.510829,1.84745,1.592856,1.645893
2,1,6.645055,2.804339,5.159441,3.592543,2.646521,1.565106,2.257845,3.229605,6.922075,...,2.563288,3.320609,0.794643,0.81226,0.925568,1.099832,1.348091,1.324041,2.312848,3.191038
3,1,9.86066,4.002138,9.453912,3.52029,4.90357,4.600238,5.796736,6.239554,11.612068,...,1.6038,2.179609,1.369098,1.533471,0.997768,1.436147,1.539725,1.451312,1.481863,1.940598
4,1,3.546819,2.251571,7.206801,3.309408,2.760355,4.053771,2.604074,1.768247,3.601283,...,1.006246,1.812694,0.867581,0.685329,0.585637,0.508474,0.811307,0.825016,0.722866,1.442172


### Function for CV

In [39]:
def ctyam2_cross_validate(classifier, hyperparameters):
    
    ctyam2_target = ctyam2.iloc[:,0]
    ctyam2_data = ctyam2.iloc[:,1:]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, ctyam2_data, ctyam2_target)

    class_names = ["Recordar", "Lectura", "Calculo"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    report = report.replace("macro avg", "macroavg")
    report = report.replace("weighted avg", "weightedavg")
    rows = report.split('\n')
    rows = [row.split() for row in rows if row.strip()]
    return tabulate(rows, headers='firstrow', tablefmt='fancy_grid')

### Support Vector Classifier

In [40]:
print(ctyam2_cross_validate(SVC(), svc_hyperparameters))

Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 0.1, 'kernel': 'linear'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 10, 'kernel': 'rbf'}
Best parameters:  {'C': 0.1, 'kernel': 'linear'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.59 │     0.72 │       0.65 │        53 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.72 │     0.6  │       0.65 │        52 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.44 │     0.43 │       0.43 │        47 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.59 │   152    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.59 │     0.58 │ 

### k-Nearest Neighbors

In [41]:
print(ctyam2_cross_validate(KNeighborsClassifier(), knn_hyperparameters))

Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
Best parameters:  {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.54 │     0.64 │       0.59 │        53 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.7  │     0.67 │       0.69 │        52 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.31 │     0.26 │       0.28 │        47 │
├─────────────┼─────────────┼──────────┼────────────┼───────

### Multilayer Neural Network

In [42]:
print(ctyam2_cross_validate(MLPClassifier(max_iter=10000), mlp_hyperparameters))

Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (2, 2)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
Best parameters:  {'hidden_layer_sizes': (3, 3)}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.36 │     0.49 │       0.42 │        53 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.54 │     0.29 │       0.37 │        52 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.33 │     0.36 │       0.34 │        47 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.38 │   152    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.41 │

### Random Forest

In [43]:
print(ctyam2_cross_validate(RandomForestClassifier(), rf_hyperparameters))

Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
Best parameters:  {'n_estimators': 450}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.66 │     0.7  │       0.68 │        53 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.73 │     0.67 │       0.7  │        52 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.48 │     0.49 │       0.48 │        47 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.62 │   152    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.62 │     0.62 │       0.62 │       152 │
├───────

### Gradient Boosting

In [44]:
print(ctyam2_cross_validate(GradientBoostingClassifier(), xgb_hyperparameters))

Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 200}
╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ Recordar    │        0.6  │     0.53 │       0.56 │        53 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Lectura     │        0.76 │     0.67 │       0.71 │        52 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ Calculo     │        0.42 │     0.53 │       0.47 │        47 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.58 │   152    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.59 │     0.58 │       0.58 │       152 │
├───────

## Aprendizaje por transferencia

In [75]:
p300mich1_target = p300mich1.iloc[:,0]
p300mich1_data = p300mich1.iloc[:,1:]
rus = RandomUnderSampler()
p300mich1_data_resampled, p300mich1_target_resampled = rus.fit_resample(p300mich1_data, p300mich1_target)
p300yam_target = p300yam.iloc[:,0]
p300yam_data = p300yam.iloc[:,1:]

### Support Vector Classifier

In [76]:
svc = SVC(C=10, kernel='rbf')
svc.fit(p300mich1_data_resampled, p300mich1_target_resampled)

y_pred = svc.predict(p300yam_data)
y_true = p300yam_target

class_names = ["P300", "NoP300"]
report = classification_report(y_true, y_pred, target_names=class_names)
report = report.replace("macro avg", "macroavg")
report = report.replace("weighted avg", "weightedavg")
rows = report.split('\n')
rows = [row.split() for row in rows if row.strip()]
table = tabulate(rows, headers='firstrow', tablefmt='fancy_grid')
print(table)

╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.15 │     0.26 │       0.19 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.77 │     0.63 │       0.69 │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.55 │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.46 │     0.44 │       0.44 │      1348 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.65 │     0.55 │       0.59 │      1348 │
╘═════════════╧═════════════╧══════════╧════════════╧═══════════╛


### Random Forest

In [77]:
svc = RandomForestClassifier(n_estimators =450)
svc.fit(p300mich1_data_resampled, p300mich1_target_resampled)

y_pred = svc.predict(p300yam_data)
y_true = p300yam_target

class_names = ["P300", "NoP300"]
report = classification_report(y_true, y_pred, target_names=class_names)
report = report.replace("macro avg", "macroavg")
report = report.replace("weighted avg", "weightedavg")
rows = report.split('\n')
rows = [row.split() for row in rows if row.strip()]
table = tabulate(rows, headers='firstrow', tablefmt='fancy_grid')
print(table)

╒═════════════╤═════════════╤══════════╤════════════╤═══════════╕
│             │   precision │   recall │   f1-score │   support │
╞═════════════╪═════════════╪══════════╪════════════╪═══════════╡
│ P300        │        0.12 │     0.2  │       0.15 │       269 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ NoP300      │        0.76 │     0.65 │       0.7  │      1079 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ accuracy    │        0.56 │  1348    │            │           │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ macroavg    │        0.44 │     0.42 │       0.43 │      1348 │
├─────────────┼─────────────┼──────────┼────────────┼───────────┤
│ weightedavg │        0.64 │     0.56 │       0.59 │      1348 │
╘═════════════╧═════════════╧══════════╧════════════╧═══════════╛


# Ejemplo feature selection

In [78]:
from itertools import cycle
import matplotlib.pyplot as plt
gbcm=GradientBoostingClassifier()
X = p300mich1.iloc[:,1:]
y = p300mich1.iloc[:,0]
gbcm.fit(X,y)

impuritys_gbc = gbcm.feature_importances_

for a in range(2000,0,-1):
    a=a/1000
    if impuritys_gbc[impuritys_gbc>a].sum()>=0.8:
        print(impuritys_gbc[impuritys_gbc>a].sum())
        print(len(impuritys_gbc[impuritys_gbc>a]))
        print(impuritys_gbc[impuritys_gbc>a])
        print(a)
        n_len=len(impuritys_gbc[impuritys_gbc>a])
        cols=impuritys_gbc>a
        break


def p300mich1_cross_validate(classifier, hyperparameters):
    
    p300mich1_target = p300mich1.iloc[:,0]
    p300mich1_data = p300mich1.iloc[:,1:]
    p300mich1_data = p300mich1_data.iloc[:,cols]
    
    n_folds = 5
    kf = StratifiedKFold(n_splits=n_folds, shuffle = True)

    acc = 0
    recall = np.array([0., 0.])
    precision = np.array([0., 0.])

    y_true = []
    y_pred = []
    
    param_grid = hyperparameters
    
    grid_search = GridSearchCV(classifier, param_grid, cv=kf)

    stratifiedkfold(kf, grid_search, y_true, y_pred, p300mich1_data, p300mich1_target)

    class_names = ["P300", "No P300"]
    report = classification_report(np.concatenate(y_true), np.concatenate(y_pred), target_names = class_names)
    return report

xgb_hyperparameters = {'n_estimators': [150, 200]}
print(p300mich1_cross_validate(GradientBoostingClassifier(max_features=n_len), xgb_hyperparameters))

0.8070729480235022
46
[0.01117807 0.00611808 0.01384605 0.01518666 0.00658807 0.00678341
 0.03956194 0.02930018 0.05256137 0.01571009 0.00648111 0.02544276
 0.01892566 0.00851477 0.00820159 0.01650498 0.0420277  0.00747649
 0.01543071 0.00794174 0.00833035 0.01259321 0.00721815 0.00883102
 0.01488392 0.00889911 0.00864752 0.01831023 0.0172571  0.05876284
 0.03086841 0.01668737 0.01781542 0.00606987 0.02936139 0.00675357
 0.01262337 0.02527568 0.05767744 0.01183128 0.01168076 0.02478149
 0.01068327 0.00826324 0.01220714 0.00697833]
0.006
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 150}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 200}
Best parameters:  {'n_estimators': 200}
              precision    recall  f1-score   support

        P300       0.45      0.74      0.56       204
     No P300       0.92      0.77      0.84       826

    accuracy                           0.77      1030
   macro avg       0.68      0.76      