In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from utils import *
import pickle

""" Sélection des noms de fichiers ne concernant que les runs 3 """


def initFilesNames():
    filesNames = []
    for x in range(100, 1000, 100):
        filesNames.append("B0" + str(x + 3) + "T")
    return filesNames


DATA_FILES = initFilesNames()

""" Application d'un model et alimentation création d'une entrée de rapport """


def apply_model(model, title, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    pickle.dump(model, open(MODELS_PATH + title, "wb"))
    y_pred = model.predict(X_test)
    return {
        "title": title,
        "score": accuracy_score(y_test, y_pred, normalize=True),
        "confusion": confusion_matrix(y_test, y_pred),
        "classification_report": classification_report(y_test, y_pred),
    }


""" Application de différents models et création d'un rapport """


def apply_models(X_train, X_test, y_train, y_test, report_entries):
    report_entries.append(
        apply_model(
            LogisticRegression(max_iter=2500),
            "LogisticRegression",
            X_train,
            y_train,
            X_test,
            y_test,
        )
    )
    abc = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=10))

    parameters = {"n_estimators": [10, 50, 250], "learning_rate": [0.01, 0.1, 1]}

    adaboost = GridSearchCV(abc, parameters, verbose=3, scoring="accuracy", n_jobs=-1)
    adaboost.fit(X_train, y_train)
    bm = adaboost.best_estimator_
    report_entries.append(
        apply_model(
            bm,
            "AdaBoost",
            X_train,
            y_train,
            X_test,
            y_test,
        )
    )


""" Edition du rapport d'exécution des models """


def print_report(report):
    for r in report:
        print(
            "------------------------------------------------------------------------------------------"
        )
        print(
            "min:",
            r["range"]["min"],
            "max: ",
            r["range"]["max"],
            "lowCut: ",
            r["filter"]["low"],
            "hightCut: ",
            r["filter"]["hight"],
            "reference: ",
            r["reference"],
        )
        print(
            "------------------------------------------------------------------------------------------"
        )
        for e in r["entries"]:
            print(e["title"], "score: ", e["score"])
            print(e["confusion"])
            print(e["classification_report"])


""" Création des jeux d'entrainement et de test
    Les données des candidats 1 à 7 servent à l'entrainement,
    celles des candidats 8 et 9 aux tests
"""


def my_train_test_split(df):
    X_test = df[(df["id"] > 6)]
    X_train = df[(df["id"] < 7)]

    y_test = X_test["eventType"]
    y_train = X_train["eventType"]
    X_test.drop(["eventType"], axis=1, inplace=True)
    X_train.drop(["eventType"], axis=1, inplace=True)

    X_test.drop(["id", "index"], axis=1, inplace=True)
    X_train.drop(["id", "index"], axis=1, inplace=True)

    return X_train, X_test, y_train, y_test


ranges = [
    {"min": -0.5, "max": 3.5},
]
report = []
epocks = []

for r in ranges:
    print(
        "***********************************************************************************************"
    )
    for f in FILTERS:
        for ref in REFERENCE:
            for d in DATA_FILES:
                epocks.append(
                    create_epochs(
                        file_preprocessing(
                            d,
                            lFilter=f["l_cut"],
                            hFilter=f["h_cut"],
                        ),
                        tmin=r["min"],
                        tmax=r["max"],
                        reference=ref,
                    )
                )
            df = extract_features(epocks)
            X_train, X_test, y_train, y_test = my_train_test_split(df)
            report_entries = []
            apply_models(X_train, X_test, y_train, y_test, report_entries)
            report.append(
                {
                    "range": {"min": r["min"], "max": r["max"]},
                    "filter": {"low": f["l_cut"], "hight": f["h_cut"]},
                    "reference": r,
                    "entries": report_entries,
                }
            )
print_report(report)

***********************************************************************************************
Creating RawArray with float64 data, n_channels=4, n_times=469011
    Range : 0 ... 469010 =      0.000 ...  1876.040 secs
Ready.
['STI101']
160 events found on stim channel STI101
Event IDs: [1 2]
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 825 samples (3.300 s)

['C3', 'Cz', 'C4', 'STI101']
160 events found on stim channel STI101
Event IDs: [1 2]
Not setting metadata
160 matching events found
Applying base

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfEvent.drop_duplicates(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop(["eventType"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["eventType"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop(["id", "index"], ax

Fitting 5 folds for each of 9 candidates, totalling 45 fits




------------------------------------------------------------------------------------------
min: -0.5 max:  3.5 lowCut:  1 hightCut:  30 reference:  {'min': -0.5, 'max': 3.5}
------------------------------------------------------------------------------------------
LogisticRegression score:  0.80625
[[112  48]
 [ 14 146]]
              precision    recall  f1-score   support

           1       0.89      0.70      0.78       160
           2       0.75      0.91      0.82       160

    accuracy                           0.81       320
   macro avg       0.82      0.81      0.80       320
weighted avg       0.82      0.81      0.80       320

AdaBoost score:  0.790625
[[110  50]
 [ 17 143]]
              precision    recall  f1-score   support

           1       0.87      0.69      0.77       160
           2       0.74      0.89      0.81       160

    accuracy                           0.79       320
   macro avg       0.80      0.79      0.79       320
weighted avg       0.80      