# Generar 'X' e 'y' para todos los experimentos
<p>Como datos para las X se toma la media de los 17 canales de:</p>
<ul>
    <li>X_1: PSD de la banda delta (1~4Hz)</li>
    <li>X_2: PSD de la banda theta (4~8Hz)</li>
    <li>X_3: PSD de la banda alpha (8~14Hz)</li>
    <li>X_4: PSD de la banda beta (14~31Hz)</li>
</ul>

In [None]:
import mne
from mne.externals.pymatreader import read_mat

from read import read_features

import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

import matplotlib
import matplotlib.pyplot as plt

import pandas as pd

In [None]:
# Las que mejores graficas tienen
#features_files = ['4_20151105_noon.mat', '4_20151107_noon.mat', '5_20141108_noon.mat','12_20150928_noon.mat','14_20151014_night.mat', '18_20150926_noon.mat', '21_20151016_noon.mat']

features_files = ['1_20151124_noon_2.csv', '2_20151106_noon.csv', '3_20151024_noon.csv','4_20151105_noon.csv', '4_20151107_noon.csv',
            '5_20141108_noon.csv', '5_20151012_night.csv', '6_20151121_noon.csv','7_20151015_night.csv', '8_20151022_noon.csv', 
            '9_20151017_night.csv', '10_20151125_noon.csv', '11_20151024_night.csv', '12_20150928_noon.csv', '13_20150929_noon.csv',
            '14_20151014_night.csv','15_20151126_night.csv', '16_20151128_night.csv', '17_20150925_noon.csv', '18_20150926_noon.csv',
            '19_20151114_noon.csv', '20_20151129_night.csv', '21_20151016_noon.csv']

# features_files = ['21_20151016_noon.csv']

X_all = {} # psd
X_all_eog = {} # psd + eog
y_all = {} # datos perclos raw

for experiment in features_files:
    
    ''' EOG parpadeos por epoch'''
    mat_data = read_mat(f'./SEED-VIG/Raw_Data/{experiment[:-4]}.mat')
    
    sfreq = mat_data['EOG']['eog_config']['current_sample_rate']
    samples = mat_data['EOG']['eog_h']*1e-6 
    samples = np.vstack((samples, mat_data['EOG']['eog_v']*1e-6))

    ch_names = ['EOG_H', 'EOG_V']
    ch_types = ["eog"]*len(ch_names)

    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)

    info.set_montage('standard_1020')
    
    raw = mne.io.RawArray(samples, info, verbose=False)
    
    # Busqueda del treshold para el parpadeo
    # En los 5 primeros segundos del experimento, buscamos el percentil 2.5 y calculamos su valor absoluto como threshold para detectar parpadeos
    threshold = abs(np.percentile(raw.get_data()[1][:5*1000], 2.5))
    
    blinks = mne.preprocessing.find_eog_events(raw, filter_length='10s', thresh=threshold, verbose=False)
    blinks = blinks.T[0]
    
    n_samples = 885
    dur_sample = 8 # segs
    len_sample = 8*125
    
    blinks_per_sample = []
    for i in range(n_samples):
        blinks_per_sample.append([])
        
    for val in blinks:
        blinks_per_sample[val//len_sample].append(val)
    
    n_blinks_per_sample = [0]*n_samples
        
    for i in range(885):
        n_blinks_per_sample[i] = len(blinks_per_sample[i])
        
    scaler = MinMaxScaler(feature_range=(0, 1))
    n_blinks_per_sample = np.array(n_blinks_per_sample)
    n_blinks_per_sample = scaler.fit_transform(n_blinks_per_sample.reshape(-1,1))
    
    
    ''' PSD '''
    mat_data = read_mat(f'./SEED-VIG/EEG_Feature_5Bands/{experiment[:-4]}.mat')
    perclos_data = read_mat(f'./SEED-VIG/perclos_labels/{experiment[:-4]}.mat')
    y = np.array(perclos_data['perclos'])

    n_channels = 17
    psd_data = mat_data['psd_movingAve']
    perclos = []
    psd_delta = []
    psd_theta = []
    psd_alpha = []
    psd_beta = []
    psd_gamma = []
    start = 0
    end = 885
    for t in range(start, end):
        vals= np.zeros(5)

        for i in range(n_channels):
            vals[0] += psd_data[i][t][0]
            vals[1] += psd_data[i][t][1]
            vals[2] += psd_data[i][t][2]
            vals[3] += psd_data[i][t][3]
            vals[4] += psd_data[i][t][4]
        vals /= 17

        perclos.append(y[t])
        psd_delta.append(vals[0])
        psd_theta.append(vals[1])
        psd_alpha.append(vals[2])
        psd_beta.append(vals[3])
        psd_gamma.append(vals[4])

    
    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_delta = np.array(psd_delta)
    psd_delta = scaler.fit_transform(psd_delta.reshape(-1,1))

    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_theta = np.array(psd_theta)
    psd_theta = scaler.fit_transform(psd_theta.reshape(-1,1))

    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_alpha = np.array(psd_alpha)
    psd_alpha = scaler.fit_transform(psd_alpha.reshape(-1,1))

    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_beta = np.array(psd_beta)
    psd_beta = scaler.fit_transform(psd_beta.reshape(-1,1))
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    psd_gamma = np.array(psd_gamma)
    psd_gamma = scaler.fit_transform(psd_gamma.reshape(-1,1))

    X = psd_delta
    X = np.hstack((X, psd_theta))
    X = np.hstack((X, psd_alpha))
    X = np.hstack((X, psd_beta))
    ''' DESCOMENTAR PARA USAR PSD DE GAMMA '''
    X = np.hstack((X, psd_gamma))
    
    X_all[f'{experiment[:-4]}'] = X # guardamos los psd
    
    X = np.hstack((X, n_blinks_per_sample))
    
    X_all_eog[f'{experiment[:-4]}'] = X # guardamos psd + eog
    
    y_all[f'{experiment[:-4]}'] = y # guardamos perclos raw
    
print(f'Generados los diccionarios X_all e y_all')

In [None]:
print(X_all_eog['1_20151124_noon_2'].shape)

for key in X_all:
    print(key)

# Busqueda de hyperparametros y muestra de los resultados

## Funciones para la busqueda y el plot

In [None]:
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import StratifiedShuffleSplit

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from scipy.stats import uniform, randint, loguniform, gamma

def find_and_test_best_model_reg(X, y, model, space, cv=10, n_iter=100, scoring=None):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    search = RandomizedSearchCV(model, space, n_iter=n_iter, scoring=scoring, n_jobs=-1, cv=cv, random_state=42)
    result = search.fit(X_train, y_train)
    
    print(f"Best Model:")
    print(f"\tScore: {result.best_score_:.4f}")
    print(f"\tHyperparameters: {result.best_params_}")
    
    print('\nResults with X_test:')
    best_model = result.best_estimator_
    final_predictions = best_model.predict(X_test)
    # Por si los valores son mayores de 1 o menores que 0
    final_predictions = np.clip(final_predictions, 0, 1)
    
    rmse = np.sqrt(mean_squared_error(y_test, final_predictions))
    print(f"\tRMSE: {rmse:.4f}")
    
    # represents the proportion of variance (of y) that has been explained by the independent variables in the model
    r2 = r2_score(y_test, final_predictions)
    print(f"\tr^2: {r2:.4f}")
    
    return best_model, rmse, r2

def plot_results_reg(X, y, model):
    plt.figure(figsize=(20, 8))
    plt.plot(y, color="black", label="real", zorder=0)
    predictions = model.predict(X)
    predictions = np.clip(predictions, 0, 1)
    plt.plot(predictions, color="red", label="predicted")
    plt.xlabel('Epoch')
    plt.ylabel('% PERCLOS')
    plt.legend(loc='best')
    plt.show()

# <a href="https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html">SVM-r</a>

In [None]:
from sklearn.svm import SVR

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = SVR()

    space = dict()
    
    space['kernel'] = ['linear', 'rbf']
    space['gamma'] = ['scale', 'auto']
    space['tol'] = uniform(1e-5, 1e-1)
    space['C'] = randint(1, 500)
    space['epsilon'] = uniform(0.01, 0.5)
    space['shrinking'] = [True, False]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_minmax, y, model, space, cv=10,  n_iter=35)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

#     plot_results_reg(X_minmax, y, best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/SVM_r-EEG.csv', index=False)

### PSD

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = SVR()

    space = dict()
    
    space['kernel'] = ['linear', 'rbf', 'sigmoid']
    space['gamma'] = ['scale', 'auto']
    space['tol'] = uniform(1e-5, 1e-1)
    space['C'] = randint(1, 500)
    space['epsilon'] = uniform(0.01, 0.5)
    space['shrinking'] = [True, False]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all[key], y_all[key], model, space, cv=10,  n_iter=25)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)
    
    #plot_results_reg(X_all[key], y_all[key], best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/SVM_r-PSD.csv', index=False)

### PSD+EOG

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = SVR()

    space = dict()
    
    space['kernel'] = ['linear', 'rbf', 'sigmoid']
    space['gamma'] = ['scale', 'auto']
    space['tol'] = uniform(1e-5, 1e-1)
    space['C'] = randint(1, 500)
    space['epsilon'] = uniform(0.01, 0.5)
    space['shrinking'] = [True, False]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all_eog[key], y_all[key], model, space, cv=10,  n_iter=25)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)
    
    #plot_results_reg(X_all_eog[key], y_all[key], best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/SVM_r-PSD+EOG.csv', index=False)

# <a href="https://scikit-learn.org/stable/modules/neighbors.html#nearest-neighbors-regression">K Nearest Neighbors (KNN)</a>

In [None]:
from sklearn.neighbors import KNeighborsRegressor

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = KNeighborsRegressor(n_jobs=-1)

    space = dict()
    space['n_neighbors'] = randint(3, 25)
    space['weights'] = ['uniform', 'distance']
    space['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute']
    space['leaf_size'] = randint(15, 45)
    space['p'] = [1, 2]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_minmax, y, model, space, cv=10,  n_iter=20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

#     plot_results_reg(X_minmax, y, best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/KNN-EEG.csv', index=False)

### PSD

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = KNeighborsRegressor(n_jobs=-1)

    space = dict()
    space['n_neighbors'] = randint(3, 25)
    space['weights'] = ['uniform', 'distance']
    space['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute']
    space['leaf_size'] = randint(15, 45)
    space['p'] = [1, 2]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all[key], y_all[key], model, space, cv=10,  n_iter=75)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)
    
    #plot_results_reg(X_all[key], y_all[key], best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/KNN-PSD.csv', index=False) 

### PSD + EOG

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = KNeighborsRegressor(n_jobs=-1)

    space = dict()
    space['n_neighbors'] = randint(3, 25)
    space['weights'] = ['uniform', 'distance']
    space['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute']
    space['leaf_size'] = randint(15, 45)
    space['p'] = [1, 2]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all_eog[key], y_all[key], model, space, cv=10,  n_iter=75)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    # plot_results_reg(X_all_eog[key], y_all[key], best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/KNN-PSD+EOG.csv', index=False) 

# <a href="https://scikit-learn.org/stable/modules/tree.html#regression">Decision Trees</a>

In [None]:
from sklearn.tree import DecisionTreeRegressor

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = DecisionTreeRegressor(random_state=42)

    space = dict()
    space['criterion'] = ['mse', 'friedman_mse']
    space['splitter'] = ["best", "random"]
    space['max_depth'] = [None] + np.array(range(5, 100)).tolist()
    space['min_samples_split'] = [2]
    space['min_samples_leaf'] = [1]
    space['max_features'] = ["auto", "sqrt", "log2"]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_minmax, y, model, space, cv=10,  n_iter=10)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_minmax, y, best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/DT-EEG.csv', index=False)

### PSD

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = DecisionTreeRegressor(random_state=42)

    space = dict()
    space['criterion'] = ['mse', 'friedman_mse', 'mae', 'poisson']
    space['splitter'] = ["best", "random"]
    space['max_depth'] = [None] + np.array(range(5, 100)).tolist()
    space['min_samples_split'] = [2]
    space['min_samples_leaf'] = [1]
    space['max_features'] = ["auto", "sqrt", "log2"]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all[key], y_all[key], model, space, cv=10,  n_iter=100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_all[key], y_all[key], best_model)
# df = pd.DataFrame(csv_dict)
# df.to_csv('./results_reg/DT-PSD.csv', index=False) 

### PSD+EOG

In [None]:
csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = DecisionTreeRegressor(random_state=42)

    space = dict()
    space['criterion'] = ['mse', 'friedman_mse', 'mae', 'poisson']
    space['splitter'] = ["best", "random"]
    space['max_depth'] = [None] + np.array(range(5, 100)).tolist()
    space['min_samples_split'] = [2]
    space['min_samples_leaf'] = [1]
    space['max_features'] = ["auto", "sqrt", "log2"]

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all_eog[key], y_all[key], model, space, cv=10,  n_iter=100)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_all_eog[key], y_all[key], best_model)
# df = pd.DataFrame(csv_dict)
# df.to_csv('./results_reg/DT-PSD+EOG.csv', index=False) 

# <a href="https://scikit-learn.org/stable/modules/gaussian_process.html#gaussian-process-regression-gpr">Gaussian Processes</a>

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = GaussianProcessRegressor(random_state=42)

    space = dict()
    #space['alpha'] = uniform(5e-9, 5e-11) #[5e-8, 1e-9, 5e-9, 1e-10, 5e-10, 1e-11, 5e-11]
    space['optimizer'] = ["fmin_l_bfgs_b"]
    space['n_restarts_optimizer'] = randint(0, 10)

    best_model, rmse, r2 = find_and_test_best_model_reg(X_minmax, y, model, space, cv=10,  n_iter=20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_minmax, y, best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/GP-EEG.csv', index=False)

### PSD

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = GaussianProcessRegressor(random_state=42)

    space = dict()
    #space['alpha'] = uniform(5e-9, 5e-11) #[5e-8, 1e-9, 5e-9, 1e-10, 5e-10, 1e-11, 5e-11]
    space['optimizer'] = ["fmin_l_bfgs_b"]
    space['n_restarts_optimizer'] = randint(0, 10)

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all[key], y_all[key], model, space, cv=10,  n_iter=20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_all[key], y_all[key], best_model)
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/GP-PSD.csv', index=False)

### PSD + EOG

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = GaussianProcessRegressor(random_state=42)

    space = dict()
    #space['alpha'] = uniform(5e-9, 5e-11) #[5e-8, 1e-9, 5e-9, 1e-10, 5e-10, 1e-11, 5e-11]
    space['optimizer'] = ["fmin_l_bfgs_b"]
    space['n_restarts_optimizer'] = randint(0, 10)

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all_eog[key], y_all[key], model, space, cv=10,  n_iter=20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_all_eog[key], y_all[key], best_model)
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/GP-PSD+EOG.csv', index=False) 

# <a href="https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html">Random Forest</a>

In [None]:
from sklearn.ensemble import RandomForestRegressor

### Mean, STD, var, p05, ...

In [None]:
csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for file in features_files:
    
    X, y = read_features(f'./features/{file[:-4]}.csv')
    #X, y = read_features(file, features=[0, 1])

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_minmax = scaler.fit_transform(X)

    scaler = StandardScaler()
    X_standar = scaler.fit_transform(X)

    key = file[:-4]
    print(f"Experiment: {key}")
    model = RandomForestRegressor(random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ['mse', 'mae']
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'X_all_eogform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, rmse, r2 = find_and_test_best_model_reg(X_minmax, y, model, space, cv=10,  n_iter=10)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_minmax, y, best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/RF-EEG.csv', index=False)

### PSD

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = RandomForestRegressor(random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ['mse', 'mae']
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'X_all_eogform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all[key], y_all[key], model, space, cv=10,  n_iter=20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    #     plot_results_reg(X_all[key], y_all[key], best_model)
    
df = pd.DataFrame(csv_dict)
df.to_csv('./results_reg/RF-PSD.csv', index=False) 

In [None]:
key = '5_20141108_noon'
print(f"Experiment: {key}")
model = RandomForestRegressor(random_state=42)

space = dict()
space['n_estimators'] = randint(75, 200)
space['criterion'] = ['mse', 'mae']
space['max_depth'] = randint(75, 150)
space['min_samples_split'] = randint(2, 10)
space['min_samples_leaf'] = randint(1, 10)
# space['min_weight_fraction_leaf'X_all_eogform(0, 0.5)
space['max_features'] = ["auto", "sqrt", "log2"]
#space['max_leaf_nodes'] = randint(1, 300)
# space['min_impurity_decrease'] = uniform(0, 5)
#space['bootstrap'] = [True, False]
# space['oob_score'] = [True, False]
space['warm_start'] = [True, False]
# space['class_weight'] = ['balanced', 'balanced_subsample']
#space['ccp_alpha'] = uniform(0, 5)
#space['max_samples'] = uniform(0, 1)

best_model, rmse, r2 = find_and_test_best_model_reg(X_all_eog[key], y_all[key], model, space, cv=10,  n_iter=20)

In [None]:
attribs = ['psd_delta', 'psd_theta', 'psd_alpha', 'psd_betha', 'psd_gamma', 'n_parpadeos']
importances = best_model.feature_importances_
importance_features = sorted(zip(importances, attribs), reverse=True)
for imp, attr in importance_features:
    print(f'{attr}: {imp:.4f}')

In [None]:
plt.figure(figsize=(20, 8))
plt.plot(y_all[key], color="black", label="real", zorder=0, linewidth=3)
predictions = best_model.predict(X_all_eog[key])
predictions = np.clip(predictions, 0, 1)
# plt.plot(predictions, color="red", label="predicted", linewidth=3)

size_label = 26; plt.xlabel('Epoch', fontweight ='bold', labelpad = 10,fontsize = size_label); plt.ylabel('PERCLOS', fontweight ='bold', labelpad = 10,fontsize = size_label);
size_ticks = 20; plt.xticks(fontsize=size_ticks); plt.yticks(fontsize=size_ticks);
size_legend = 20; plt.legend(loc='best', fontsize = size_legend)
plt.savefig("E:/UNIVERSIDAD/TFG/TRABAJO/Images-Test/NOMBRE.pdf", bbox_inches='tight')

plt.show()

### PSD + EOG

In [None]:
key = '21_20151016_noon'

csv_dict = {
    'sujeto': [],
    'rmse': [],
    'cc': [],
    "r2": []
}

for key in X_all:
    print(f"Experiment: {key}")
    model = RandomForestRegressor(random_state=42)

    space = dict()
    space['n_estimators'] = randint(75, 200)
    space['criterion'] = ['mse', 'mae']
    space['max_depth'] = randint(75, 150)
    space['min_samples_split'] = randint(2, 10)
    space['min_samples_leaf'] = randint(1, 10)
    # space['min_weight_fraction_leaf'X_all_eogform(0, 0.5)
    space['max_features'] = ["auto", "sqrt", "log2"]
    #space['max_leaf_nodes'] = randint(1, 300)
    # space['min_impurity_decrease'] = uniform(0, 5)
    #space['bootstrap'] = [True, False]
    # space['oob_score'] = [True, False]
    space['warm_start'] = [True, False]
    # space['class_weight'] = ['balanced', 'balanced_subsample']
    #space['ccp_alpha'] = uniform(0, 5)
    #space['max_samples'] = uniform(0, 1)

    best_model, rmse, r2 = find_and_test_best_model_reg(X_all_eog[key], y_all[key], model, space, cv=10,  n_iter=20)
    
    csv_dict['sujeto'].append(key)
    csv_dict['rmse'].append(rmse)
    csv_dict['cc'].append(np.sqrt(r2))
    csv_dict['r2'].append(r2)

    plot_results_reg(X_all_eog[key], y_all[key], best_model)
    
# df = pd.DataFrame(csv_dict)
# df.to_csv('./results_reg/RF-PSD+EOG.csv', index=False) 