In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from utils import *
import pickle

""" Sélection des noms de fichiers ne concernant que les runs 3 """


def initFilesNames():
    filesNames = []
    for x in range(100, 1000, 100):
        filesNames.append("B0" + str(x + 3) + "T")
    return filesNames

def initUntrainFilesNames():
    filesNames = []
    for x in range(100, 1000, 100):
        filesNames.append("B0" + str(x + 1) + "T")
        filesNames.append("B0" + str(x + 2) + "T")
    return filesNames


DATA_FILES = initFilesNames()
UNTRAIN_DATA_FILES = initUntrainFilesNames()

""" Application d'un model et alimentation création d'une entrée de rapport """


def apply_model(model, title, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    pickle.dump(model, open(MODELS_PATH + title, "wb"))
    y_pred = model.predict(X_test)
    return {
        "title": title,
        "score": accuracy_score(y_test, y_pred, normalize=True),
        "confusion": confusion_matrix(y_test, y_pred),
        "classification_report": classification_report(y_test, y_pred),
    }


""" Application de différents models et création d'un rapport """


def apply_models(X_train, X_test, y_train, y_test, report_entries):
    report_entries.append(
        apply_model(
            LogisticRegression(max_iter=2500),
            "LogisticRegression",
            X_train,
            y_train,
            X_test,
            y_test,
        )
    )
    """abc = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=10))

    parameters = {"n_estimators": [10, 50, 250], "learning_rate": [0.01, 0.1, 1]}

    adaboost = GridSearchCV(abc, parameters, verbose=3, scoring="accuracy", n_jobs=-1)
    adaboost.fit(X_train, y_train)
    print(adaboost.best_estimator_)
    bm = adaboost.best_estimator_
    report_entries.append(
        apply_model(
            bm,
            "AdaBoost",
            X_train,
            y_train,
            X_test,
            y_test,
        )
    )"""

    report_entries.append(
        apply_model(
            RandomForestClassifier(n_estimators=92, criterion="log_loss"),
            "RandomForestClassifier",
            X_train,
            y_train,
            X_test,
            y_test,
        )
    )


""" Edition du rapport d'exécution des models """


def print_report(report):
    for r in report:
        print(
            "------------------------------------------------------------------------------------------"
        )
        print(
            "min:",
            r["range"]["min"],
            "max: ",
            r["range"]["max"],
            "lowCut: ",
            r["filter"]["low"],
            "hightCut: ",
            r["filter"]["hight"],
            "reference: ",
            r["reference"],
        )
        print(
            "------------------------------------------------------------------------------------------"
        )
        for e in r["entries"]:
            print(e["title"], "score: ", e["score"])
            print(e["confusion"])
            print(e["classification_report"])


report = []
epocks = []
scaler = None
l_cut= 1
h_cut= 30
rmin = -0.5
rmax = 3.5
reference = ["Cz"]

print(
    "***********************************************************************************************"
)
for d in DATA_FILES:
    epocks.append(
        create_epochs(
            file_preprocessing(
                d,
                lFilter=l_cut,
                hFilter=h_cut,
            ),
            tmin=rmin,
            tmax=rmax,
            reference=reference,
        )
    )
X_train, X_test, y_train, y_test, scaler = extract_features(epocks, True, None)
X_test.to_csv(MODELS_TEST_PATH + "Xtest.csv")
y_test.to_csv(MODELS_TEST_PATH + "ytest.csv")
report_entries = []
apply_models(X_train, X_test, y_train, y_test, report_entries)
report.append(
    {
        "range": {"min": rmin, "max": rmax},
        "filter": {"low": l_cut, "hight": h_cut},
        "reference": reference,
        "entries": report_entries,
    }
)
print_report(report)
for d in UNTRAIN_DATA_FILES:
    epocks.append(
        create_epochs(
            file_preprocessing(
                d,
                lFilter=l_cut,
                hFilter=h_cut,
            ),
            tmin=rmin,
            tmax=rmax,
            reference=reference,
        )
    )
X_train, X_test, y_train, y_test, scaler = extract_features(epocks, False, scaler)
X_train.to_csv(MODELS_TEST_PATH + "XUntrainTest.csv")
y_train.to_csv(MODELS_TEST_PATH + "yUntrainTest.csv")




***********************************************************************************************
Creating RawArray with float64 data, n_channels=4, n_times=469011
    Range : 0 ... 469010 =      0.000 ...  1876.040 secs
Ready.
['STI101']
160 events found on stim channel STI101
Event IDs: [1 2]
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 825 samples (3.300 s)

['C3', 'Cz', 'C4', 'STI101']
160 events found on stim channel STI101
Event IDs: [1 2]
Not setting metadata
160 matching events found
Applying base

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfEvent.drop_duplicates(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfEvent.drop_duplicates(inplace=True)


------------------------------------------------------------------------------------------
min: -0.5 max:  3.5 lowCut:  1 hightCut:  30 reference:  ['Cz']
------------------------------------------------------------------------------------------
LogisticRegression score:  0.803125
[[107  53]
 [ 10 150]]
              precision    recall  f1-score   support

           1       0.91      0.67      0.77       160
           2       0.74      0.94      0.83       160

    accuracy                           0.80       320
   macro avg       0.83      0.80      0.80       320
weighted avg       0.83      0.80      0.80       320

RandomForestClassifier score:  0.78125
[[116  44]
 [ 26 134]]
              precision    recall  f1-score   support

           1       0.82      0.72      0.77       160
           2       0.75      0.84      0.79       160

    accuracy                           0.78       320
   macro avg       0.78      0.78      0.78       320
weighted avg       0.78      0.78 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfEvent.drop_duplicates(inplace=True)
