# Generar 'X' e 'y' para todos los experimentos
<p>Como datos para las X se toma la media de los 17 canales de:</p>
<ul>
    <li>X_1: PSD de la banda delta (1~4Hz)</li>
    <li>X_2: PSD de la banda theta (4~8Hz)</li>
    <li>X_3: PSD de la banda alpha (8~14Hz)</li>
    <li>X_4: PSD de la banda beta (14~31Hz)</li>
    <li>X_5: PSD de la banda gamma (31~50 Hz)</li>
</ul>

In [None]:
import mne
from mne.externals.pymatreader import read_mat

from read import read_features

import matplotlib
import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [None]:
labels = {
    'leve': 0,
    'moderada': 1,
    'severa': 2
}

def parse_y(y, lim_leve=0.075, lim_moderada=0.15):
    def classify(val):
        if (val <= lim_leve):
            return labels['leve']
        if (val <= lim_moderada):
            return labels['moderada']
        return labels['severa']
    
    len_y = len(y)
    classified = np.zeros(len_y, dtype=int)
    
    for i in range(len_y):
        classified[i] = classify(y[i])
        
    return classified

In [None]:
# Las que mejores graficas tienen
# features_files = ['4_20151105_noon.mat', '4_20151107_noon.mat', '5_20141108_noon.mat','12_20150928_noon.mat','14_20151014_night.mat', '18_20150926_noon.mat', '21_20151016_noon.mat']

# features_files = ['4_20151105_noon.mat', '5_20141108_noon.mat', '21_20151016_noon.mat']

features_files = ['1_20151124_noon_2.csv', '2_20151106_noon.csv', '3_20151024_noon.csv','4_20151105_noon.csv', '4_20151107_noon.csv',
            '5_20141108_noon.csv', '5_20151012_night.csv', '6_20151121_noon.csv','7_20151015_night.csv', '8_20151022_noon.csv', 
            '9_20151017_night.csv', '10_20151125_noon.csv', '11_20151024_night.csv', '12_20150928_noon.csv', '13_20150929_noon.csv',
            '14_20151014_night.csv','15_20151126_night.csv', '16_20151128_night.csv', '17_20150925_noon.csv', '18_20150926_noon.csv',
            '19_20151114_noon.csv', '20_20151129_night.csv', '21_20151016_noon.csv']

# features_files = ['21_20151016_noon.csv']

X_all = {} # psd
X_all_eog = {} # psd + eog
y_all = {} # datos perclos raw
y_all_limits = {} # thresholds perclos para cada experimento
y_all_class = {} # datos parseados a su clase

for experiment in features_files:
    
    ''' EOG parpadeos por epoch'''
    mat_data = read_mat(f'./SEED-VIG/Raw_Data/{experiment[:-4]}.mat')
    
    sfreq = mat_data['EOG']['eog_config']['current_sample_rate']
    samples = mat_data['EOG']['eog_h']*1e-6 
    samples = np.vstack((samples, mat_data['EOG']['eog_v']*1e-6))

    ch_names = ['EOG_H', 'EOG_V']
    ch_types = ["eog"]*len(ch_names)

    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)

    info.set_montage('standard_1020')
    
    raw = mne.io.RawArray(samples, info, verbose=False)
    
    # Busqueda del treshold para el parpadeo
    # En los 5 primeros segundos del experimento, buscamos el percentil 2.5 y calculamos su valor absoluto como threshold para detectar parpadeos
    threshold = abs(np.percentile(raw.get_data()[1][:5*1000], 2.5))
    
    blinks = mne.preprocessing.find_eog_events(raw, filter_length='10s', thresh=threshold, verbose=False)
    blinks = blinks.T[0]
    
    n_samples = 885
    dur_sample = 8 # segs
    len_sample = 8*125
    
    blinks_per_sample = []
    for i in range(n_samples):
        blinks_per_sample.append([])
        
    for val in blinks:
        blinks_per_sample[val//len_sample].append(val)
    
    n_blinks_per_sample = [0]*n_samples
        
    for i in range(885):
        n_blinks_per_sample[i] = len(blinks_per_sample[i])
        
    scaler = MinMaxScaler(feature_range=(0, 1))
    n_blinks_per_sample = np.array(n_blinks_per_sample)
    n_blinks_per_sample = scaler.fit_transform(n_blinks_per_sample.reshape(-1,1))
    
    
    ''' PSD '''
    mat_data = read_mat(f'./SEED-VIG/EEG_Feature_5Bands/{experiment[:-4]}.mat')
    perclos_data = read_mat(f'./SEED-VIG/perclos_labels/{experiment[:-4]}.mat')
    y = np.array(perclos_data['perclos'])

    n_channels = 17
    psd_data = mat_data['psd_movingAve']
    perclos = []
    psd_delta = []
    psd_theta = []
    psd_alpha = []
    psd_beta = []
    psd_gamma = []
    start = 0
    end = 885
    for t in range(start, end):
        vals= np.zeros(5)

        for i in range(n_channels):
            vals[0] += psd_data[i][t][0]
            vals[1] += psd_data[i][t][1]
            vals[2] += psd_data[i][t][2]
            vals[3] += psd_data[i][t][3]
            vals[4] += psd_data[i][t][4]
        vals /= 17

        perclos.append(y[t])
        psd_delta.append(vals[0])
        psd_theta.append(vals[1])
        psd_alpha.append(vals[2])
        psd_beta.append(vals[3])
        psd_gamma.append(vals[4])

    
    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_delta = np.array(psd_delta)
    psd_delta = scaler.fit_transform(psd_delta.reshape(-1,1))

    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_theta = np.array(psd_theta)
    psd_theta = scaler.fit_transform(psd_theta.reshape(-1,1))

    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_alpha = np.array(psd_alpha)
    psd_alpha = scaler.fit_transform(psd_alpha.reshape(-1,1))

    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_beta = np.array(psd_beta)
    psd_beta = scaler.fit_transform(psd_beta.reshape(-1,1))
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_gamma = np.array(psd_gamma)
    psd_gamma = scaler.fit_transform(psd_gamma.reshape(-1,1))

    X = psd_delta
    X = np.hstack((X, psd_theta))
    X = np.hstack((X, psd_alpha))
    X = np.hstack((X, psd_beta))
    ''' DESCOMENTAR PARA USAR PSD DE GAMMA '''
    X = np.hstack((X, psd_gamma))
    
    X_all[f'{experiment[:-4]}'] = X # guardamos los psd
    
    X = np.hstack((X, n_blinks_per_sample))
    
    X_all_eog[f'{experiment[:-4]}'] = X # guardamos psd + eog
    
    ''' LABELS '''
    y_all[f'{experiment[:-4]}'] = y # guardamos perclos raw
    
    range_values = (np.max(y)-np.min(y))/100
    lim_leve = np.min(y)+range_values*12.5
    lim_moderada = np.min(y)+range_values*30
    y_all_limits[f'{experiment[:-4]}'] = [lim_leve, lim_moderada]
    
    y_all_class[f'{experiment[:-4]}'] = parse_y(y, lim_leve, lim_moderada) # guardamos ya clasificada
    
print(f'Generados los diccionarios X_all e y_all')

In [None]:
print(X_all_eog['21_20151016_noon'].shape)

for key in X_all:
    print(key)

# Busqueda de hyperparametros y muestra de los resultados

## Funciones para la busqueda y el plot

In [None]:
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import StratifiedShuffleSplit

from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import plot_confusion_matrix

from scipy.stats import uniform, randint, loguniform, gamma

def find_and_test_best_model_class(X, y, model, space, cv=10, n_iter=100, scoring=None, conf_mat=True):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    search = RandomizedSearchCV(model, space, n_iter=n_iter, scoring=scoring, n_jobs=-1, cv=cv, random_state=42)
    result = search.fit(X_train, y_train)
    
    print(f"Best Model:")
    print(f"\tScore: {result.best_score_:.4f}")
    print(f"\tHyperparameters: {result.best_params_}")
    
    print('\nResults with X_test:')
    best_model = result.best_estimator_
    final_predictions = best_model.predict(X_test)
    
    #TODO sklearn.metrics.classification_report ofrece los siguientes parametros y alguna cosilla mas, puede ser interesante ???
    acc = accuracy_score(y_test, final_predictions)
    pre = precision_score(y_test, final_predictions, average='macro', zero_division=1)
    rec = recall_score(y_test, final_predictions, average='macro', zero_division=1)
    f1 = f1_score(y_test, final_predictions, average='macro', zero_division=1)
    print(f"\tAccuracy: {acc*100:.2f}%")
    print(f"\tPrecision: {pre:.4f}")
    print(f"\tRecall: {rec:.4f}")
    print(f"\tf1: {f1:.4f}")
    
    ''' PARA MOSTRAR LA MATRIZ DE CONFUSION HAY QUE PASAR EL PARAMETRO conf_mat=True '''
    if (conf_mat):
        print(f'Matriz de confusión para X completo')
        try:
            plt.rcParams.update({'font.size': 16, 'font.weight': 'bold'})
            fig, ax = plt.subplots(figsize=(10, 5))
            plot_confusion_matrix(best_model, X, y, ax=ax, display_labels=['leve', 'moderara', 'severa'], colorbar=False)
            size_ticks = 16; plt.xticks(fontsize=size_ticks, fontweight ='normal'); plt.yticks(fontsize=size_ticks, fontweight ='normal');
            size_label = 20; plt.xlabel('Predicted Label', fontweight ='bold', labelpad = 10,fontsize = size_label); plt.ylabel('True Label', fontweight ='bold', labelpad = 10,fontsize = size_label);
            plt.savefig("E:/UNIVERSIDAD/TFG/TRABAJO/Images-Test/NOMBRE.pdf", bbox_inches='tight')
        except:
            pass
    
    return best_model, acc, pre, rec, f1

def map_2_colors(y, labels):
    label_2_color = {
        0: 'green',
        1: 'orange',
        2: 'red'
    }
    
    cmap = []
    for i in range(len(y)):
        cmap.append(label_2_color[y[i]])
    return cmap

def plot_results_class(X, y, model, lim_leve_moderada, lim_moderada_severa):
    plt.rcParams.update({'font.size': 30, 'font.weight': 'normal'})
    plt.figure(figsize=(20, 8))
    plt.axhspan(0, lim_leve_moderada, facecolor='green', alpha=0.2, label="Somnolencia leve")
    plt.axhspan(lim_leve_moderada, lim_moderada_severa, facecolor='orange', alpha=0.2, label="Somnolencia moderada")
    plt.axhspan(lim_moderada_severa, 1, facecolor='red', alpha=0.2, label="Somnolencia severa")
#     plt.plot(y, c='k', alpha=0.6)
    colors = map_2_colors(model.predict(X), labels)
    plt.scatter(range(len(y)), y, c=colors)
#     plt.legend(bbox_to_anchor=(1, 1), loc='upper left')
    size_label = 26; plt.xlabel('Epoch', fontweight ='bold', labelpad = 10,fontsize = size_label); plt.ylabel('PERCLOS', fontweight ='bold', labelpad = 10,fontsize = size_label);
    size_ticks = 24; plt.xticks(fontsize=size_ticks); plt.yticks(fontsize=size_ticks);
    plt.savefig("E:/UNIVERSIDAD/TFG/TRABAJO/Images-Test/NOMBRE2.pdf", bbox_inches='tight')
    plt.show()

# SVM classifier (No va tan bien)

In [None]:
from sklearn import svm

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])
    key = file[:-4]
    y = y_all[key]
    y_class = y_all_class[key]

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = svm.SVC(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['C'] = uniform(1, 20)
    space['kernel'] = ['linear', 'rbf']
    space['gamma'] = ['scale', 'auto']
    space['shrinking'] = [True, False]
    space['probability'] = [True, False]
    space['decision_function_shape'] = ['ovo', 'ovr']

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_minmax, y_class, model, space, cv, 35)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_minmax, y, best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/SVM_c-EEG.csv', index=False)

### PSD

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = svm.SVC(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['C'] = uniform(1, 20)
    space['kernel'] = ['linear', 'rbf']
    space['gamma'] = ['scale', 'auto']
    space['shrinking'] = [True, False]
    space['probability'] = [True, False]
    space['decision_function_shape'] = ['ovo', 'ovr']

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all[key], y_all_class[key], model, space, cv, 100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/SVM_c-PSD.csv', index=False)

### PSD + EOG

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = svm.SVC(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['C'] = uniform(1, 20)
    space['kernel'] = ['linear', 'rbf']
    space['gamma'] = ['scale', 'auto']
    space['shrinking'] = [True, False]
    space['probability'] = [True, False]
    space['decision_function_shape'] = ['ovo', 'ovr']

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all_eog[key], y_all_class[key], model, space, cv, 100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all_eog[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/SVM_c-PSD+EOG.csv', index=False)

# KNN classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])
    key = file[:-4]
    y = y_all[key]
    y_class = y_all_class[key]

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = KNeighborsClassifier()

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_neighbors'] = randint(3, 21)
    space['weights'] = ['uniform', 'distance']
    space['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute']
    space['leaf_size'] = randint(15,50)
    space['p'] = [1, 2]

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_minmax, y_class, model, space, cv, 100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_minmax, y, best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/KNN-EEG.csv', index=False)

### PSD

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = KNeighborsClassifier()
    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_neighbors'] = randint(3, 21)
    space['weights'] = ['uniform', 'distance']
    space['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute']
    space['leaf_size'] = randint(15,50)
    space['p'] = [1, 2]

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all[key], y_all_class[key], model, space, cv, 100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)
    
    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/KNN-PSD.csv', index=False)

### PSD + EOG

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = KNeighborsClassifier()
    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_neighbors'] = randint(3, 21)
    space['weights'] = ['uniform', 'distance']
    space['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute']
    space['leaf_size'] = randint(15,50)
    space['p'] = [1, 2]

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all_eog[key], y_all_class[key], model, space, cv, 100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)
    
    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all_eog[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/KNN-PSD+EOG.csv', index=False)

# Decision Tree classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])
    key = file[:-4]
    y = y_all[key]
    y_class = y_all_class[key]

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = DecisionTreeClassifier(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['criterion'] = ["gini", "entropy"]
    space['splitter'] = ["best", "random"]
    space['max_depth'] = randint(5, 100)
    space['min_samples_split'] = randint(2, 10)
    #space['min_samples_leaf'] = randint(1, 10)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['ccp_alpha'] = uniform(0, 5) # empeora el ponerlo, el mejor valor es 0.0

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_minmax, y_class, model, space, cv, 150)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_minmax, y, best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/DT-EEG.csv', index=False)

### PSD

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")

    model = DecisionTreeClassifier(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['criterion'] = ["gini", "entropy"]
    space['splitter'] = ["best", "random"]
    space['max_depth'] = randint(5, 100)
    space['min_samples_split'] = randint(2, 10)
    #space['min_samples_leaf'] = randint(1, 10)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['ccp_alpha'] = uniform(0, 5) # empeora el ponerlo, el mejor valor es 0.0

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all[key], y_all_class[key], model, space, cv, 150)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/DT-PSD.csv', index=False)

### PSD + EOG

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")

    model = DecisionTreeClassifier(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['criterion'] = ["gini", "entropy"]
    space['splitter'] = ["best", "random"]
    space['max_depth'] = randint(5, 100)
    space['min_samples_split'] = randint(2, 10)
    #space['min_samples_leaf'] = randint(1, 10)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['ccp_alpha'] = uniform(0, 5) # empeora el ponerlo, el mejor valor es 0.0

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all_eog[key], y_all_class[key], model, space, cv, 150)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all_eog[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/DT-PSD+EOG.csv', index=False)

# Gaussian Process classifier

In [None]:
from sklearn.gaussian_process import GaussianProcessClassifier

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(f'./features/{file[:-4]}.csv', features=[0, 1])
    key = file[:-4]
    y = y_all[key]
    y_class = y_all_class[key]

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = GaussianProcessClassifier(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['optimizer'] = ["fmin_l_bfgs_b"]
    space['n_restarts_optimizer'] = randint(0, 5)
    space['max_iter_predict'] = randint(75, 200)
    space['warm_start'] = [True, False]
    space['multi_class'] = ['one_vs_rest', 'one_vs_one']

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_minmax, y_class, model, space, cv, 20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_minmax, y, best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/GP-EEG.csv', index=False)

### PSD

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")

    model = GaussianProcessClassifier(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['optimizer'] = ["fmin_l_bfgs_b"]
    space['n_restarts_optimizer'] = randint(0, 5)
    space['max_iter_predict'] = randint(75, 200)
    space['warm_start'] = [True, False]
    space['multi_class'] = ['one_vs_rest', 'one_vs_one']

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all[key], y_all_class[key], model, space, cv, 10)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/GP-PSD.csv', index=False)

### PSD + EOG

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")

    model = GaussianProcessClassifier(random_state=42)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['optimizer'] = ["fmin_l_bfgs_b"]
    space['n_restarts_optimizer'] = randint(0, 5)
    space['max_iter_predict'] = randint(75, 200)
    space['warm_start'] = [True, False]
    space['multi_class'] = ['one_vs_rest', 'one_vs_one']

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all_eog[key], y_all_class[key], model, space, cv, 10)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all_eog[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/GP-PSD+EOG.csv', index=False)

# Random Forest classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
#     X, y = read_features(f'./features/{file[:-4]}.csv', features=[0, 1])
    key = file[:-4]
    y = y_all[key]
    y_class = y_all_class[key]

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = RandomForestClassifier(random_state=42, n_jobs=-1)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ["gini", "entropy"]
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'] = uniform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_minmax, y_class, model, space, cv, 20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_minmax, y, best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/RF-EEG.csv', index=False)

### PSD

In [None]:
for key in ['5_20141108_noon']:
    print(f"Experiment: {key}")

    model = RandomForestClassifier(random_state=42, n_jobs=-1)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ["gini", "entropy"]
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'] = uniform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all[key], y_all_class[key], model, space, cv, 20, conf_mat=True)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)

In [None]:
plt.rcParams.update({'font.size': 12, 'font.weight': 'bold'})
fig, ax = plt.subplots(figsize=(10, 4))
plot_confusion_matrix(best_model, X_all[key], y_all_class[key], ax=ax, display_labels=['leve', 'moderara', 'severa'], colorbar=False)
size_ticks = 12; plt.xticks(fontsize=size_ticks, fontweight ='normal'); plt.yticks(fontsize=size_ticks, fontweight ='normal');
size_label = 14; plt.xlabel('Predicted Label', fontweight ='bold', labelpad = 10,fontsize = size_label); plt.ylabel('True Label', fontweight ='bold', labelpad = 10,fontsize = size_label);
plt.savefig("E:/UNIVERSIDAD/TFG/TRABAJO/Images-Test/perclos-class-confmat.pdf", bbox_inches='tight')

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")

    model = RandomForestClassifier(random_state=42, n_jobs=-1)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ["gini", "entropy"]
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'] = uniform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all[key], y_all_class[key], model, space, cv, 20, conf_mat=False)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/RF-PSD.csv', index=False)

### PSD + EOG

In [None]:
csv_dict = {
    'sujeto': [],
    'acc': [],
    'pre': [],
    'rec': [],
    "f1": []
}

for key in X_all:
    print(f"Experiment: {key}")

    model = RandomForestClassifier(random_state=42, n_jobs=-1)

    cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ["gini", "entropy"]
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'] = uniform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, acc, pre, rec, f1= find_and_test_best_model_class(X_all_eog[key], y_all_class[key], model, space, cv, 20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['acc'].append(acc)
    csv_dict['pre'].append(pre)
    csv_dict['rec'].append(rec)
    csv_dict['f1'].append(f1)

    lim_leve_moderada, lim_moderada_severa = y_all_limits[key]
    plot_results_class(X_all_eog[key], y_all[key], best_model, lim_leve_moderada, lim_moderada_severa)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_cla/RF-PSD+EOG.csv', index=False)