# 前処理

In [28]:
import os
import json

import numpy as np
import pandas as pd

from sklearn.model_selection import KFold
from pycaret.classification import *

In [29]:
from eeg_utils import *

_,train_target = prepare_all()

In [30]:
# 各cvの結果を一つのDataFrameにする
def load_vals(sub,isplit, data_type):
    vals={}
    for model in "conv1d conv2d lstm r3d mc3 r2plus1d".split():
        filename = f"{model}_{data_type}/{sub}_{isplit}.json"
        key=f"{sub}-{isplit}"
        with open(filename) as f:
            d = json.load(f)[key]

        vals[model]=d

    df=pd.DataFrame()
    for k,v in vals.items():
        df[k]=np.argmax(v,axis=1)
    if data_type=="val":
        df["target"]=train_target[f"{sub}"][isplit]%10-1
    return df

In [31]:
def calc_accuracy(df):
    accuracy={}
    for model in df.columns:
        if model!="target":
            acc = sum(df[model]==df["target"])/len(df)
            accuracy[model]=acc
    return accuracy

# 学習

In [32]:
# predictions = {}
save_path = "stacking_model3"
os.makedirs(save_path,exist_ok=True)
for sub in range(5):
    sub_str = f"{sub:04d}"
    for isplit in range(3):
        df_val = load_vals(sub_str,isplit,"val")
        df_test = load_vals(sub_str,isplit,"test")
        setup(data=df_val,categorical_features=list(df_val.columns)[:-1], target="target", fold=10, session_id=1, ignore_features=["isplit"])

        best_model = compare_models()
        results=pull()
        save_model(best_model, f"{save_path}/{sub_str}-{isplit}")

Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(158, 7)"
4,Transformed data shape,"(158, 17)"
5,Transformed train set shape,"(110, 17)"
6,Transformed test set shape,"(48, 17)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9818,0.0,0.9818,0.9864,0.9801,0.969,0.9715,0.152
svm,SVM - Linear Kernel,0.9818,0.0,0.9818,0.9856,0.9805,0.97,0.9725,0.019
rf,Random Forest Classifier,0.9727,0.9984,0.9727,0.9803,0.9716,0.9553,0.9589,0.054
gbc,Gradient Boosting Classifier,0.9636,0.0,0.9636,0.9735,0.9627,0.9407,0.9454,0.05
et,Extra Trees Classifier,0.9636,0.9979,0.9636,0.9735,0.9623,0.9412,0.9459,0.039
lightgbm,Light Gradient Boosting Machine,0.9636,0.9957,0.9636,0.9682,0.9625,0.9398,0.9422,0.073
ridge,Ridge Classifier,0.9545,0.0,0.9545,0.9652,0.9529,0.9272,0.933,0.023
lda,Linear Discriminant Analysis,0.9545,0.0,0.9545,0.9652,0.9529,0.9272,0.933,0.016
knn,K Neighbors Classifier,0.9455,0.9934,0.9455,0.9545,0.9438,0.9103,0.9151,0.017
nb,Naive Bayes,0.9455,0.9876,0.9455,0.9576,0.9427,0.9113,0.9185,0.016


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9364,0.9614,0.9364,0.9452,0.9358,0.8961,0.9007,0.018
lr,Logistic Regression,0.8833,0.0,0.8833,0.9002,0.8766,0.8107,0.8217,0.018
lightgbm,Light Gradient Boosting Machine,0.8833,0.9622,0.8833,0.9024,0.8771,0.8107,0.8221,0.076
lda,Linear Discriminant Analysis,0.8742,0.0,0.8742,0.8996,0.8706,0.7981,0.8093,0.016
rf,Random Forest Classifier,0.8735,0.9489,0.8735,0.8897,0.8708,0.7972,0.8066,0.047
nb,Naive Bayes,0.8652,0.938,0.8652,0.8926,0.8649,0.7836,0.7943,0.018
ridge,Ridge Classifier,0.8652,0.0,0.8652,0.8918,0.8582,0.7817,0.795,0.017
gbc,Gradient Boosting Classifier,0.8644,0.0,0.8644,0.9036,0.8574,0.7807,0.7996,0.055
et,Extra Trees Classifier,0.8561,0.9374,0.8561,0.8859,0.8536,0.7696,0.7828,0.042
dt,Decision Tree Classifier,0.8553,0.872,0.8553,0.8816,0.8533,0.7654,0.7804,0.015


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(159, 7)"
4,Transformed data shape,"(159, 17)"
5,Transformed train set shape,"(111, 17)"
6,Transformed test set shape,"(48, 17)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.8833,0.9586,0.8833,0.9063,0.8772,0.8105,0.8233,0.019
nb,Naive Bayes,0.8742,0.9605,0.8742,0.9086,0.8704,0.8025,0.8189,0.019
rf,Random Forest Classifier,0.8742,0.987,0.8742,0.8983,0.8688,0.8013,0.8131,0.048
gbc,Gradient Boosting Classifier,0.8652,0.0,0.8652,0.8938,0.8608,0.786,0.7994,0.056
et,Extra Trees Classifier,0.8652,0.9708,0.8652,0.8942,0.8577,0.7871,0.8019,0.041
ridge,Ridge Classifier,0.8561,0.0,0.8561,0.8794,0.8479,0.7649,0.7783,0.018
lr,Logistic Regression,0.847,0.0,0.847,0.8722,0.8376,0.7532,0.7671,0.017
lightgbm,Light Gradient Boosting Machine,0.847,0.9684,0.847,0.8733,0.8422,0.757,0.7686,0.078
dt,Decision Tree Classifier,0.8379,0.8736,0.8379,0.8688,0.8286,0.7376,0.7533,0.018
lda,Linear Discriminant Analysis,0.8379,0.0,0.8379,0.8483,0.8228,0.7367,0.7547,0.018


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.8212,0.0,0.8212,0.8254,0.8078,0.7067,0.7221,0.018
lda,Linear Discriminant Analysis,0.8129,0.0,0.8129,0.8212,0.8008,0.6947,0.7107,0.013
lr,Logistic Regression,0.8121,0.0,0.8121,0.8167,0.7941,0.6912,0.7104,0.018
knn,K Neighbors Classifier,0.803,0.8856,0.803,0.8308,0.7939,0.6766,0.6961,0.018
et,Extra Trees Classifier,0.803,0.8923,0.803,0.8412,0.7931,0.6809,0.7022,0.035
lightgbm,Light Gradient Boosting Machine,0.7856,0.9151,0.7856,0.804,0.7632,0.6494,0.676,0.076
rf,Random Forest Classifier,0.7765,0.9181,0.7765,0.8061,0.7625,0.6357,0.662,0.047
svm,SVM - Linear Kernel,0.7674,0.0,0.7674,0.7773,0.7371,0.6199,0.6516,0.018
ada,Ada Boost Classifier,0.7674,0.0,0.7674,0.7818,0.7603,0.6294,0.6457,0.026
gbc,Gradient Boosting Classifier,0.7591,0.0,0.7591,0.7977,0.7471,0.6084,0.6377,0.053


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9205,0.0,0.9205,0.9428,0.9171,0.8721,0.8837,0.015
knn,K Neighbors Classifier,0.9114,0.9583,0.9114,0.9304,0.9078,0.8577,0.8683,0.015
ridge,Ridge Classifier,0.9023,0.0,0.9023,0.9304,0.8934,0.8407,0.8577,0.017
lda,Linear Discriminant Analysis,0.8932,0.0,0.8932,0.9205,0.884,0.8261,0.8428,0.013
rf,Random Forest Classifier,0.8848,0.9856,0.8848,0.9102,0.8756,0.8101,0.8265,0.046
nb,Naive Bayes,0.875,0.9489,0.875,0.9122,0.876,0.8041,0.8197,0.017
et,Extra Trees Classifier,0.8667,0.9736,0.8667,0.8936,0.8577,0.7825,0.7979,0.038
svm,SVM - Linear Kernel,0.8659,0.0,0.8659,0.8953,0.8573,0.7819,0.7964,0.019
ada,Ada Boost Classifier,0.8659,0.0,0.8659,0.8997,0.8643,0.7858,0.7984,0.029
gbc,Gradient Boosting Classifier,0.8659,0.0,0.8659,0.8953,0.8597,0.781,0.7968,0.051


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(159, 7)"
4,Transformed data shape,"(159, 19)"
5,Transformed train set shape,"(111, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
nb,Naive Bayes,0.8833,0.9342,0.8833,0.9041,0.878,0.8128,0.825,0.017
knn,K Neighbors Classifier,0.875,0.937,0.875,0.8922,0.8681,0.7984,0.8092,0.015
ridge,Ridge Classifier,0.8652,0.0,0.8652,0.8839,0.8604,0.7823,0.7929,0.015
lda,Linear Discriminant Analysis,0.8652,0.0,0.8652,0.8862,0.8607,0.7843,0.7957,0.014
gbc,Gradient Boosting Classifier,0.8568,0.0,0.8568,0.8762,0.8509,0.7706,0.7832,0.05
lr,Logistic Regression,0.8561,0.0,0.8561,0.8703,0.849,0.766,0.7764,0.015
svm,SVM - Linear Kernel,0.8561,0.0,0.8561,0.8821,0.8517,0.7702,0.7838,0.016
ada,Ada Boost Classifier,0.8561,0.0,0.8561,0.8624,0.8437,0.7684,0.7844,0.026
rf,Random Forest Classifier,0.847,0.976,0.847,0.8633,0.8406,0.7515,0.763,0.048
lightgbm,Light Gradient Boosting Machine,0.847,0.9509,0.847,0.8776,0.8404,0.7507,0.7668,0.087


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8591,0.0,0.8591,0.8722,0.8557,0.7746,0.7841,0.018
lightgbm,Light Gradient Boosting Machine,0.85,0.9483,0.85,0.8686,0.8456,0.7594,0.7696,0.061
svm,SVM - Linear Kernel,0.8409,0.0,0.8409,0.8632,0.8371,0.742,0.7575,0.019
rf,Random Forest Classifier,0.8402,0.9514,0.8402,0.8674,0.8349,0.745,0.7611,0.045
ridge,Ridge Classifier,0.8318,0.0,0.8318,0.8539,0.8286,0.7336,0.7461,0.014
lda,Linear Discriminant Analysis,0.8318,0.0,0.8318,0.8539,0.8286,0.7336,0.7461,0.017
nb,Naive Bayes,0.8235,0.9247,0.8235,0.8496,0.8263,0.7209,0.7301,0.015
gbc,Gradient Boosting Classifier,0.8144,0.0,0.8144,0.8368,0.8095,0.7023,0.7136,0.055
ada,Ada Boost Classifier,0.8045,0.0,0.8045,0.8298,0.8022,0.6854,0.7011,0.025
et,Extra Trees Classifier,0.8045,0.9161,0.8045,0.8268,0.7961,0.6861,0.7014,0.038


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(159, 7)"
4,Transformed data shape,"(159, 19)"
5,Transformed train set shape,"(111, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.9008,0.0,0.9008,0.9178,0.8954,0.8377,0.8504,0.018
nb,Naive Bayes,0.9,0.9421,0.9,0.9238,0.9007,0.8404,0.8534,0.018
rf,Random Forest Classifier,0.8818,0.9648,0.8818,0.8891,0.8805,0.8104,0.8156,0.046
lr,Logistic Regression,0.8735,0.0,0.8735,0.8915,0.8689,0.7939,0.8063,0.014
knn,K Neighbors Classifier,0.8735,0.9329,0.8735,0.8885,0.8713,0.7951,0.8041,0.018
ridge,Ridge Classifier,0.8735,0.0,0.8735,0.8929,0.8688,0.7933,0.8066,0.014
lightgbm,Light Gradient Boosting Machine,0.8735,0.9405,0.8735,0.885,0.8726,0.7964,0.8036,0.066
svm,SVM - Linear Kernel,0.8727,0.0,0.8727,0.8904,0.8701,0.7944,0.8052,0.018
gbc,Gradient Boosting Classifier,0.8545,0.0,0.8545,0.8728,0.8517,0.7644,0.7766,0.051
et,Extra Trees Classifier,0.8545,0.9255,0.8545,0.8656,0.8543,0.7668,0.7728,0.039


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(158, 7)"
4,Transformed data shape,"(158, 19)"
5,Transformed train set shape,"(110, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.8727,0.9444,0.8727,0.9023,0.868,0.7977,0.8143,0.018
lr,Logistic Regression,0.8545,0.0,0.8545,0.8793,0.8545,0.7671,0.7784,0.018
ridge,Ridge Classifier,0.8455,0.0,0.8455,0.8672,0.8455,0.7522,0.7624,0.016
rf,Random Forest Classifier,0.8455,0.9513,0.8455,0.8739,0.8434,0.7529,0.7682,0.046
lda,Linear Discriminant Analysis,0.8364,0.0,0.8364,0.8604,0.8352,0.7392,0.751,0.016
et,Extra Trees Classifier,0.8364,0.916,0.8364,0.8627,0.835,0.7392,0.7519,0.038
lightgbm,Light Gradient Boosting Machine,0.8364,0.9458,0.8364,0.8674,0.832,0.737,0.7518,0.06
svm,SVM - Linear Kernel,0.8273,0.0,0.8273,0.8549,0.8253,0.7232,0.7374,0.017
nb,Naive Bayes,0.8091,0.8872,0.8091,0.8476,0.8125,0.7004,0.715,0.018
gbc,Gradient Boosting Classifier,0.8091,0.0,0.8091,0.8446,0.8068,0.6961,0.7111,0.051


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9462,0.9541,0.9462,0.9592,0.9427,0.9123,0.9216,0.015
svm,SVM - Linear Kernel,0.9205,0.0,0.9205,0.9429,0.9155,0.871,0.8842,0.018
lightgbm,Light Gradient Boosting Machine,0.9197,0.9832,0.9197,0.9416,0.9185,0.8698,0.8831,0.06
lr,Logistic Regression,0.9189,0.0,0.9189,0.9422,0.9119,0.8671,0.884,0.018
rf,Random Forest Classifier,0.9182,0.9777,0.9182,0.9425,0.9118,0.8672,0.8841,0.05
gbc,Gradient Boosting Classifier,0.9023,0.0,0.9023,0.9152,0.9031,0.8455,0.8517,0.054
ridge,Ridge Classifier,0.9015,0.0,0.9015,0.9293,0.894,0.8379,0.858,0.015
et,Extra Trees Classifier,0.9008,0.9727,0.9008,0.9263,0.8938,0.8396,0.8575,0.042
nb,Naive Bayes,0.8932,0.9531,0.8932,0.9108,0.8927,0.8325,0.8424,0.016
lda,Linear Discriminant Analysis,0.8841,0.0,0.8841,0.9161,0.8749,0.8083,0.83,0.017


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.947,0.0,0.947,0.9612,0.944,0.917,0.9249,0.015
lightgbm,Light Gradient Boosting Machine,0.9462,0.9962,0.9462,0.9565,0.9446,0.9144,0.9214,0.076
knn,K Neighbors Classifier,0.9386,0.9845,0.9386,0.9487,0.9339,0.9016,0.909,0.015
ridge,Ridge Classifier,0.9379,0.0,0.9379,0.9536,0.9345,0.9016,0.9108,0.017
rf,Random Forest Classifier,0.9379,0.9939,0.9379,0.9511,0.9332,0.8989,0.9087,0.045
lda,Linear Discriminant Analysis,0.9379,0.0,0.9379,0.9544,0.937,0.9029,0.912,0.015
nb,Naive Bayes,0.9288,0.9563,0.9288,0.9462,0.9255,0.8895,0.8987,0.015
gbc,Gradient Boosting Classifier,0.9288,0.0,0.9288,0.9434,0.9244,0.887,0.8969,0.05
dt,Decision Tree Classifier,0.9212,0.9375,0.9212,0.9294,0.9181,0.8739,0.8815,0.016
svm,SVM - Linear Kernel,0.9212,0.0,0.9212,0.9355,0.9166,0.8755,0.8843,0.017


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 19)"
5,Transformed train set shape,"(112, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9197,0.9849,0.9197,0.9394,0.9185,0.8764,0.8868,0.062
dt,Decision Tree Classifier,0.9189,0.9348,0.9189,0.9419,0.9172,0.8746,0.8871,0.015
rf,Random Forest Classifier,0.9189,0.9886,0.9189,0.943,0.9172,0.8756,0.8881,0.048
lr,Logistic Regression,0.9106,0.0,0.9106,0.9263,0.9092,0.8571,0.8658,0.016
svm,SVM - Linear Kernel,0.9106,0.0,0.9106,0.9268,0.9078,0.8569,0.8673,0.033
ada,Ada Boost Classifier,0.9106,0.0,0.9106,0.9347,0.9072,0.8578,0.8728,0.026
gbc,Gradient Boosting Classifier,0.9106,0.0,0.9106,0.9254,0.9095,0.8568,0.8654,0.055
et,Extra Trees Classifier,0.9098,0.9923,0.9098,0.9344,0.9073,0.8593,0.8733,0.038
knn,K Neighbors Classifier,0.9015,0.9605,0.9015,0.9201,0.9,0.8415,0.8524,0.015
ridge,Ridge Classifier,0.8742,0.0,0.8742,0.9005,0.8711,0.8003,0.814,0.014


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(159, 7)"
4,Transformed data shape,"(159, 19)"
5,Transformed train set shape,"(111, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.8386,0.9063,0.8386,0.8754,0.832,0.7394,0.7608,0.015
ridge,Ridge Classifier,0.8212,0.0,0.8212,0.8593,0.8117,0.7102,0.7324,0.017
rf,Random Forest Classifier,0.8212,0.9388,0.8212,0.8585,0.8149,0.716,0.7344,0.047
lda,Linear Discriminant Analysis,0.8121,0.0,0.8121,0.8518,0.8071,0.6978,0.7156,0.017
lr,Logistic Regression,0.8114,0.0,0.8114,0.8302,0.7976,0.6943,0.716,0.015
lightgbm,Light Gradient Boosting Machine,0.8114,0.92,0.8114,0.8459,0.8051,0.6977,0.7162,0.076
et,Extra Trees Classifier,0.803,0.9256,0.803,0.8361,0.8005,0.6889,0.7036,0.036
dt,Decision Tree Classifier,0.7939,0.8336,0.7939,0.8385,0.7869,0.6696,0.6911,0.018
gbc,Gradient Boosting Classifier,0.7939,0.0,0.7939,0.8306,0.7891,0.67,0.687,0.053
nb,Naive Bayes,0.7848,0.9162,0.7848,0.8375,0.7781,0.6624,0.692,0.017


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(159, 7)"
4,Transformed data shape,"(159, 19)"
5,Transformed train set shape,"(111, 19)"
6,Transformed test set shape,"(48, 19)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9545,0.0,0.9545,0.9667,0.9534,0.9235,0.9311,0.021
rf,Random Forest Classifier,0.9455,0.9956,0.9455,0.9589,0.9442,0.907,0.916,0.041
svm,SVM - Linear Kernel,0.9364,0.0,0.9364,0.9513,0.9352,0.8948,0.9018,0.02
ridge,Ridge Classifier,0.9288,0.0,0.9288,0.9441,0.9274,0.8853,0.8936,0.018
lda,Linear Discriminant Analysis,0.9288,0.0,0.9288,0.9441,0.9274,0.8853,0.8936,0.014
et,Extra Trees Classifier,0.928,0.9961,0.928,0.9413,0.9273,0.88,0.8888,0.038
lightgbm,Light Gradient Boosting Machine,0.928,0.9926,0.928,0.9415,0.9269,0.882,0.8906,0.067
knn,K Neighbors Classifier,0.9273,0.9827,0.9273,0.946,0.9258,0.8778,0.8891,0.02
dt,Decision Tree Classifier,0.9189,0.9314,0.9189,0.9399,0.9145,0.8652,0.8795,0.02
gbc,Gradient Boosting Classifier,0.9189,0.0,0.9189,0.9339,0.9178,0.867,0.8769,0.052


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,Session id,1
1,Target,target
2,Target type,Multiclass
3,Original data shape,"(160, 7)"
4,Transformed data shape,"(160, 17)"
5,Transformed train set shape,"(112, 17)"
6,Transformed test set shape,"(48, 17)"
7,Ignore features,1
8,Categorical features,6
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.8492,0.9289,0.8492,0.8587,0.8355,0.7534,0.7762,0.019
lr,Logistic Regression,0.8311,0.0,0.8311,0.8695,0.8237,0.7244,0.7459,0.019
rf,Random Forest Classifier,0.8227,0.9431,0.8227,0.8441,0.8086,0.7168,0.7419,0.053
ridge,Ridge Classifier,0.822,0.0,0.822,0.8673,0.8147,0.7115,0.7358,0.018
nb,Naive Bayes,0.8129,0.9247,0.8129,0.8576,0.8052,0.703,0.7271,0.019
lda,Linear Discriminant Analysis,0.8129,0.0,0.8129,0.8509,0.8044,0.6939,0.7168,0.014
ada,Ada Boost Classifier,0.8121,0.0,0.8121,0.837,0.798,0.6883,0.7129,0.029
svm,SVM - Linear Kernel,0.8038,0.0,0.8038,0.823,0.7986,0.6815,0.6941,0.019
lightgbm,Light Gradient Boosting Machine,0.8038,0.9368,0.8038,0.8526,0.7981,0.6859,0.7114,0.067
gbc,Gradient Boosting Classifier,0.7871,0.0,0.7871,0.8438,0.7829,0.6641,0.6885,0.051


Transformation Pipeline and Model Successfully Saved


# 予測

In [33]:
save_path = "stacking_model3"
predictions={}
for sub in range(5):
    sub_str = f"{sub:04d}"
    predictions[sub_str]=[]

    for isplit in range(3):
        df_test = load_vals(sub_str,isplit,"test")

        filename=os.path.join(save_path,f"{sub_str}-{isplit}")
        best_model = load_model(filename)
        pred = predict_model(best_model, data=df_test)
        predictions[sub_str].append(pred)


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


Transformation Pipeline and Model Successfully Loaded


In [34]:
df_all_pred=[]
for sub in "0000 0001 0002 0003 0004".split():
    df_pred = pd.concat([predictions[sub][i]["prediction_label"] for i in range(3)],axis=1)
    df_pred["sub"]=sub
    df_all_pred.append(df_pred)
df_all_pred = pd.concat(df_all_pred)
df_all_pred

Unnamed: 0,prediction_label,prediction_label.1,prediction_label.2,sub
0,2,2,2,0000
1,2,2,2,0000
2,2,2,2,0000
3,2,2,2,0000
4,0,0,0,0000
...,...,...,...,...
154,2,2,2,0004
155,2,2,2,0004
156,1,1,1,0004
157,0,0,1,0004


### 3cvの結果を多数決する。同数の場合は2とする。

In [35]:
df_all_pred["title"]=df_all_pred.apply(lambda row: f"subject{int(row['sub'])}_{row.name:03d}", axis=1)
df_all_pred[["mode0","mode1","mode2"]]=df_all_pred.iloc[:,:3].mode(axis=1)
df_all_pred["final"]=df_all_pred["mode0"]
df_all_pred.loc[df_all_pred['mode2'].notna(), 'final'] = df_all_pred.loc[df_all_pred['mode2'].notna(), 'mode2']
df_all_pred.final = df_all_pred.final.astype(int)
df_all_pred["event"] = df_all_pred.apply(lambda row: event2str3[row["final"]+1], axis=1)
df_all_pred


Unnamed: 0,prediction_label,prediction_label.1,prediction_label.2,sub,title,mode0,mode1,mode2,final,event
0,2,2,2,0000,subject0_000,2.0,,,2,pumping
1,2,2,2,0000,subject0_001,2.0,,,2,pumping
2,2,2,2,0000,subject0_002,2.0,,,2,pumping
3,2,2,2,0000,subject0_003,2.0,,,2,pumping
4,0,0,0,0000,subject0_004,0.0,,,0,frontside_kickturn
...,...,...,...,...,...,...,...,...,...,...
154,2,2,2,0004,subject4_154,2.0,,,2,pumping
155,2,2,2,0004,subject4_155,2.0,,,2,pumping
156,1,1,1,0004,subject4_156,1.0,,,1,backside_kickturn
157,0,0,1,0004,subject4_157,0.0,,,0,frontside_kickturn


In [36]:
df_all_pred[["title","event"]].to_csv("classification1_1028.csv",index=False,header=False)