# Project Fatigue
- GitHub [link](https://github.com/romainmartinez/fatigue)

In [1]:
# Common imports
import scipy.io as sio
import numpy as np
import os

# Figures
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

# to make this notebook's output stable across runs
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

## 0. load data

In [2]:
DATA_PATH = os.path.join('.', 'data/')
DATA_FILE = 'DatabaseRPT.mat'
mat = sio.loadmat(os.path.join(DATA_PATH, DATA_FILE))['DataBaseRPT'][0, 0]

- label
    - `Y` (1, 162): 1 = prefatigue, 2 = fatigue

- used features
    - `AllX` (24, 162): 24 (6 DoF x 4 variables) x 162 (81 participants x 2 time points).
    - `CAssignAll` (1, 24): AllX column assignment

    - `Sex` (1, 162)
    - `Endurance` (1, 162)

- not used
    - `BestX` (7, 162): 7 (variables with SRM>0.8) x 162 (81 participants x 2 time points) matrix. Contains data only for the most responsive variables (SRM>0.8).
    - `CAssignBest` (1, 7): BestX column assignment.
    - `Age` (1, 162): too much NaN.
    - `Height` (1, 162): too much NaN.
    - `Weight` (1, 162) Too much NaN.
    - `SubjectKey` (1, 162): useless.
    - `SID` (1, 162): useless.

### create feature matrix

In [3]:
col_names = [i[0] for i in mat['CAssignAll'].flatten()]
# find variable with SRM > .8
srm_names = np.array([i[0] for i in mat['CAssignBest'].flatten()])
srm_idx = np.in1d(col_names, srm_names)

# add `AllX`
X = mat['AllX'].T

# add `Sex`
X = np.c_[X, mat['Sex'].T]
col_names.append('Sex')

# add `Endurance`
X = np.c_[X, mat['Endurance'].T]
col_names.append('Endurance')

col_names = np.array(col_names)

### create label vector

In [4]:
from sklearn.preprocessing import LabelBinarizer
y = mat['Y'].T
y = LabelBinarizer().fit_transform(y)

### create report

In [5]:
import os.path
import pandas as pd

# create dataframe
df = pd.DataFrame(X, columns=col_names)
df['fatigue (label)'] = y

# generate pandas report
REPORT_FILENAME = './pandas_report.html'
if not os.path.isfile(REPORT_FILENAME):
    import pandas_profiling
    report = pandas_profiling.ProfileReport(df)
    report.to_file('./pandas_report.html')

### split data & shuffle

In [6]:
# split data & shuffle
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

---

In [8]:
from tpot import TPOTClassifier

tpot = TPOTClassifier(generations=100, population_size=100, cv=5, scoring='roc_auc',
                      verbosity=2, n_jobs=-1, random_state=RANDOM_SEED)
tpot.fit(X_train, y_train.ravel())

  return f(*args, **kwds)
  return f(*args, **kwds)
Optimization Progress:   2%|▏         | 200/10100 [00:37<41:36,  3.97pipeline/s]

Generation 1 - Current best internal CV score: 0.9012820512820513


Optimization Progress:   3%|▎         | 300/10100 [01:10<59:21,  2.75pipeline/s]

Generation 2 - Current best internal CV score: 0.9012820512820513


Optimization Progress:   4%|▍         | 400/10100 [01:54<2:07:12,  1.27pipeline/s]

Generation 3 - Current best internal CV score: 0.9070019723865878


Optimization Progress:   5%|▍         | 500/10100 [02:40<1:21:56,  1.95pipeline/s]

Generation 4 - Current best internal CV score: 0.9083826429980277


Optimization Progress:   6%|▌         | 600/10100 [03:20<1:12:14,  2.19pipeline/s]

Generation 5 - Current best internal CV score: 0.9131163708086785


Optimization Progress:   7%|▋         | 700/10100 [03:56<1:52:47,  1.39pipeline/s]

Generation 6 - Current best internal CV score: 0.9143984220907297


Optimization Progress:   8%|▊         | 800/10100 [04:42<1:31:10,  1.70pipeline/s]

Generation 7 - Current best internal CV score: 0.9143984220907297


Optimization Progress:   9%|▉         | 900/10100 [05:12<1:01:32,  2.49pipeline/s]

Generation 8 - Current best internal CV score: 0.9143984220907297


Optimization Progress:  10%|▉         | 1000/10100 [05:40<1:15:12,  2.02pipeline/s]

Generation 9 - Current best internal CV score: 0.9143984220907297


Optimization Progress:  11%|█         | 1100/10100 [06:05<47:59,  3.13pipeline/s]

Generation 10 - Current best internal CV score: 0.9143984220907297


Optimization Progress:  12%|█▏        | 1200/10100 [06:32<38:12,  3.88pipeline/s]

Generation 11 - Current best internal CV score: 0.9143984220907297


Optimization Progress:  13%|█▎        | 1300/10100 [07:00<53:36,  2.74pipeline/s]

Generation 12 - Current best internal CV score: 0.9143984220907297


Optimization Progress:  14%|█▍        | 1400/10100 [07:26<49:50,  2.91pipeline/s]  

Generation 13 - Current best internal CV score: 0.9155818540433925


Optimization Progress:  15%|█▍        | 1500/10100 [07:49<40:17,  3.56pipeline/s]

Generation 14 - Current best internal CV score: 0.9155818540433925


Optimization Progress:  16%|█▌        | 1600/10100 [08:16<1:24:07,  1.68pipeline/s]

Generation 15 - Current best internal CV score: 0.9155818540433925


Optimization Progress:  17%|█▋        | 1700/10100 [08:36<30:39,  4.57pipeline/s]

Generation 16 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  18%|█▊        | 1800/10100 [09:10<56:02,  2.47pipeline/s]

Generation 17 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  19%|█▉        | 1900/10100 [09:23<22:28,  6.08pipeline/s]

Generation 18 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  20%|█▉        | 2000/10100 [09:48<48:26,  2.79pipeline/s]

Generation 19 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  21%|██        | 2100/10100 [10:05<31:22,  4.25pipeline/s]

Generation 20 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  22%|██▏       | 2200/10100 [10:23<24:49,  5.31pipeline/s]

Generation 21 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  23%|██▎       | 2300/10100 [10:42<29:12,  4.45pipeline/s]

Generation 22 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  24%|██▍       | 2400/10100 [11:03<28:46,  4.46pipeline/s]

Generation 23 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  25%|██▍       | 2500/10100 [11:19<18:16,  6.93pipeline/s]

Generation 24 - Current best internal CV score: 0.9168639053254438


Optimization Progress:  26%|██▌       | 2600/10100 [11:37<25:51,  4.83pipeline/s]

Generation 25 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  27%|██▋       | 2700/10100 [11:54<43:30,  2.84pipeline/s]

Generation 26 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  28%|██▊       | 2800/10100 [12:21<41:43,  2.92pipeline/s]

Generation 27 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  29%|██▊       | 2900/10100 [12:36<21:14,  5.65pipeline/s]

Generation 28 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  30%|██▉       | 3000/10100 [12:53<18:42,  6.33pipeline/s]

Generation 29 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  31%|███       | 3100/10100 [13:07<17:39,  6.61pipeline/s]

Generation 30 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  32%|███▏      | 3200/10100 [13:26<19:29,  5.90pipeline/s]

Generation 31 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  33%|███▎      | 3300/10100 [13:40<16:29,  6.87pipeline/s]

Generation 32 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  34%|███▎      | 3400/10100 [14:01<17:51,  6.25pipeline/s]

Generation 33 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  35%|███▍      | 3500/10100 [14:26<23:36,  4.66pipeline/s]

Generation 34 - Current best internal CV score: 0.9180473372781066


Optimization Progress:  36%|███▌      | 3600/10100 [14:46<22:34,  4.80pipeline/s]

Generation 35 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  37%|███▋      | 3700/10100 [15:02<18:13,  5.85pipeline/s]

Generation 36 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  38%|███▊      | 3800/10100 [15:22<21:47,  4.82pipeline/s]

Generation 37 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  39%|███▊      | 3900/10100 [15:41<23:06,  4.47pipeline/s]

Generation 38 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  40%|███▉      | 4000/10100 [16:02<20:43,  4.91pipeline/s]

Generation 39 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  41%|████      | 4100/10100 [16:18<20:39,  4.84pipeline/s]

Generation 40 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  42%|████▏     | 4200/10100 [16:48<37:39,  2.61pipeline/s]

Generation 41 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  43%|████▎     | 4300/10100 [17:07<20:58,  4.61pipeline/s]

Generation 42 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  44%|████▎     | 4400/10100 [17:31<19:34,  4.85pipeline/s]

Generation 43 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  45%|████▍     | 4500/10100 [17:50<23:50,  3.91pipeline/s]

Generation 44 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  46%|████▌     | 4600/10100 [18:09<25:04,  3.65pipeline/s]

Generation 45 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  47%|████▋     | 4700/10100 [18:26<19:26,  4.63pipeline/s]

Generation 46 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  48%|████▊     | 4800/10100 [18:44<11:55,  7.40pipeline/s]

Generation 47 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  49%|████▊     | 4900/10100 [19:24<29:55,  2.90pipeline/s]

Generation 48 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  50%|████▉     | 5000/10100 [19:40<11:29,  7.40pipeline/s]

Generation 49 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  50%|█████     | 5100/10100 [20:01<22:26,  3.71pipeline/s]

Generation 50 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  51%|█████▏    | 5200/10100 [20:20<19:46,  4.13pipeline/s]

Generation 51 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  52%|█████▏    | 5300/10100 [20:40<21:55,  3.65pipeline/s]

Generation 52 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  53%|█████▎    | 5400/10100 [21:06<18:54,  4.14pipeline/s]

Generation 53 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  54%|█████▍    | 5500/10100 [21:23<21:11,  3.62pipeline/s]

Generation 54 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  55%|█████▌    | 5600/10100 [21:43<17:31,  4.28pipeline/s]

Generation 55 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  56%|█████▋    | 5700/10100 [22:01<19:21,  3.79pipeline/s]

Generation 56 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  57%|█████▋    | 5800/10100 [22:17<14:12,  5.04pipeline/s]

Generation 57 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  58%|█████▊    | 5900/10100 [22:36<19:20,  3.62pipeline/s]

Generation 58 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  59%|█████▉    | 6000/10100 [22:51<13:05,  5.22pipeline/s]

Generation 59 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  60%|██████    | 6100/10100 [23:10<11:58,  5.57pipeline/s]

Generation 60 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  61%|██████▏   | 6200/10100 [23:43<23:54,  2.72pipeline/s]

Generation 61 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  62%|██████▏   | 6300/10100 [23:59<09:18,  6.81pipeline/s]

Generation 62 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  63%|██████▎   | 6400/10100 [24:19<10:07,  6.09pipeline/s]

Generation 63 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  64%|██████▍   | 6500/10100 [24:47<09:47,  6.12pipeline/s]

Generation 64 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  65%|██████▌   | 6600/10100 [25:02<09:51,  5.92pipeline/s]

Generation 65 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  66%|██████▋   | 6700/10100 [25:30<11:44,  4.82pipeline/s]

Generation 66 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  67%|██████▋   | 6800/10100 [25:48<09:46,  5.63pipeline/s]

Generation 67 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  68%|██████▊   | 6900/10100 [26:06<06:00,  8.88pipeline/s]

Generation 68 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  69%|██████▉   | 7000/10100 [26:25<12:07,  4.26pipeline/s]

Generation 69 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  70%|███████   | 7100/10100 [26:39<05:55,  8.45pipeline/s]

Generation 70 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  71%|███████▏  | 7200/10100 [27:02<11:08,  4.34pipeline/s]

Generation 71 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  72%|███████▏  | 7300/10100 [27:21<07:57,  5.87pipeline/s]

Generation 72 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  73%|███████▎  | 7400/10100 [27:40<06:34,  6.85pipeline/s]

Generation 73 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  74%|███████▍  | 7500/10100 [28:06<12:17,  3.52pipeline/s]

Generation 74 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  75%|███████▌  | 7600/10100 [28:22<04:10,  9.99pipeline/s]

Generation 75 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  76%|███████▌  | 7700/10100 [28:44<07:54,  5.05pipeline/s]

Generation 76 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  77%|███████▋  | 7800/10100 [29:03<04:00,  9.57pipeline/s]

Generation 77 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  78%|███████▊  | 7900/10100 [29:22<05:36,  6.54pipeline/s]

Generation 78 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  79%|███████▉  | 8000/10100 [29:45<04:52,  7.19pipeline/s]

Generation 79 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  80%|████████  | 8100/10100 [30:04<03:36,  9.24pipeline/s]

Generation 80 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  81%|████████  | 8200/10100 [30:33<05:13,  6.06pipeline/s]

Generation 81 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  82%|████████▏ | 8300/10100 [31:02<08:44,  3.43pipeline/s]

Generation 82 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  83%|████████▎ | 8400/10100 [31:27<07:19,  3.87pipeline/s]

Generation 83 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  84%|████████▍ | 8500/10100 [31:54<06:20,  4.20pipeline/s]

Generation 84 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  85%|████████▌ | 8600/10100 [32:19<05:24,  4.62pipeline/s]

Generation 85 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  86%|████████▌ | 8700/10100 [32:45<05:08,  4.54pipeline/s]

Generation 86 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  87%|████████▋ | 8800/10100 [33:04<04:17,  5.06pipeline/s]

Generation 87 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  88%|████████▊ | 8900/10100 [33:35<06:15,  3.19pipeline/s]

Generation 88 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  89%|████████▉ | 9000/10100 [34:06<03:55,  4.66pipeline/s]

Generation 89 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  90%|█████████ | 9100/10100 [34:24<03:01,  5.52pipeline/s]

Generation 90 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  91%|█████████ | 9200/10100 [34:45<02:51,  5.24pipeline/s]

Generation 91 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  92%|█████████▏| 9300/10100 [35:13<03:29,  3.82pipeline/s]

Generation 92 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  93%|█████████▎| 9400/10100 [35:39<03:29,  3.34pipeline/s]

Generation 93 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  94%|█████████▍| 9500/10100 [36:04<02:43,  3.68pipeline/s]

Generation 94 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  95%|█████████▌| 9600/10100 [36:30<02:56,  2.83pipeline/s]

Generation 95 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  96%|█████████▌| 9700/10100 [36:50<01:08,  5.88pipeline/s]

Generation 96 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  97%|█████████▋| 9800/10100 [37:07<01:00,  4.92pipeline/s]

Generation 97 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  98%|█████████▊| 9900/10100 [37:31<00:51,  3.89pipeline/s]

Generation 98 - Current best internal CV score: 0.9192307692307692


Optimization Progress:  99%|█████████▉| 10000/10100 [37:55<00:21,  4.59pipeline/s]

Generation 99 - Current best internal CV score: 0.9192307692307692


                                                                                  

Generation 100 - Current best internal CV score: 0.9192307692307692

Best pipeline: GradientBoostingClassifier(input_matrix, learning_rate=0.001, max_depth=1, max_features=0.7500000000000001, min_samples_leaf=7, min_samples_split=16, n_estimators=100, subsample=0.3)




TPOTClassifier(config_dict={'sklearn.naive_bayes.GaussianNB': {}, 'sklearn.naive_bayes.BernoulliNB': {'alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0], 'fit_prior': [True, False]}, 'sklearn.naive_bayes.MultinomialNB': {'alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0], 'fit_prior': [True, False]}, 'sklearn.tree.DecisionT....3 , 0.35, 0.4 , 0.45, 0.5 , 0.55,
       0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  ])}}}},
        crossover_rate=0.1, cv=5, disable_update_check=False,
        early_stop=None, generations=100, max_eval_time_mins=5,
        max_time_mins=None, memory=None, mutation_rate=0.9, n_jobs=4,
        offspring_size=100, periodic_checkpoint_folder=None,
        population_size=100, random_state=42, scoring=None, subsample=1.0,
        verbosity=2, warm_start=False)

In [9]:
print(tpot.score(X_test, y_test))

0.7867647058823529


In [10]:
tpot.export('tpot_pipeline.py')

True