# ADHD classification

## DT method

### ZigZag trace

In [3]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from matplotlib.colors import ListedColormap
import graphviz
from sklearn.preprocessing import StandardScaler
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [4]:
f=pd.read_csv('Dataset/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

(87, 31)

In [5]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
tuned_parameters = {'max_depth':[1, 2, 3, 4, 5],
         'min_samples_leaf':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
         'min_samples_split':[2, 3, 4, 5]}

#DecisionTreeClassifier(max_depth=4, min_samples_leaf=2, random_state=0)


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

[ 1740.85  1563.31  1466.88  1688.38  1858.82  2091.91  3254.4   4006.28
  2357.96  1828.74  2391.82  2329.72  1622.88  1891.23 13118.    1769.75
  1651.74  2346.78  3485.1   3047.08  3018.19  2176.34  1855.85  2421.99
  2324.16  3061.06  2358.62  1739.48  1817.36  2062.04  2111.    2888.18
  2340.96  1440.    1782.47  1306.    1416.27  1558.54  1444.39  1679.02
  1692.5   1436.61  3078.13  3199.09  2211.81  8520.    2281.85  2475.82
  1256.    1511.    1385.    2230.67  2211.01  2845.28  1836.84  2961.26
  2358.1   1849.72  1558.15  1652.61  2648.88  1487.22  1671.49  2626.81
  1699.02  1636.    4210.43  3946.52  2438.3   1976.15  2247.1   1444.9
  2613.4   2316.59  1937.3   1466.13  3147.4   3192.41  2502.    1293.
  1462.83  2763.39  2531.    1224.    1407.31  1792.25  1425.57]


In [6]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [7]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    DecisionTreeClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    8.6s finished
Features: 1/2[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/2

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.022988505747126436 {'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 1/3[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/3[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/3

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.022988505747126436 {'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.6s finished
Features: 1/4[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/4[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/4[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/4

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.022988505747126436 {'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 1/5[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/5[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/5[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/5[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 con

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.011494252873563218 {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/6[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/6[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/6[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/6[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 con

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.022988505747126436 {'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.6s finished
Features: 1/7[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/7[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/7[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/7[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 con

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.6s finished
Features: 1/8[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/8[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/8[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/8[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 con

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    1.1s finished
Features: 1/9[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/9[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.2s finished
Features: 3/9[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.2s finished
Features: 4/9[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 con

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 1/10[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/10[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.1s finished
Features: 3/10[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/10[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.022988505747126436 {'max_depth': 5, 'min_samples_leaf': 9, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/11[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.8s finished
Features: 2/11[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.1s finished
Features: 3/11[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    1.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.2s finished
Features: 4/11[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/12[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/12[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/12[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/12[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/13[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/13[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/13[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/13[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/14[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/14[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/14[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/14[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.6s finished
Features: 1/15[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/15[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/15[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/15[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/16[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/16[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/16[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/16[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/17[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/17[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/17[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/17[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/18[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/18[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/18[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/18[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.011494252873563218 {'max_depth': 4, 'min_samples_leaf': 10, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/19[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/19[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/19[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/19[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/20[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/20[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/20[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/20[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/21[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/21[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/21[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/21[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/22[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/22[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/22[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/22[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 1/23[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.7s finished
Features: 2/23[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.1s finished
Features: 3/23[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/23[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/24[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/24[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/24[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/24[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.6s finished
Features: 1/25[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/25[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/25[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/25[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/26[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/26[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/26[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/26[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.5s finished
Features: 1/27[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.6s finished
Features: 2/27[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
Features: 3/27[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
Features: 4/27[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 1/28[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.8s finished
Features: 2/28[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.1s finished
Features: 3/28[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/28[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 1/29[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.9s finished
Features: 2/29[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.1s finished
Features: 3/29[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
Features: 4/29[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12

Fitting 87 folds for each of 200 candidates, totalling 17400 fits
0.0 {'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2}


AttributeError: k_features must be between 1 and X.shape[1].

In [None]:
re.to_csv('Dataset/DT_Zigzag_trace_SFFS.csv')

### ZigZag predict

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from matplotlib.colors import ListedColormap
import graphviz
from sklearn.preprocessing import StandardScaler
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [None]:
f=pd.read_csv('Dataset/Task2.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
tuned_parameters = {'max_depth':[1, 2, 3, 4, 5],
         'min_samples_leaf':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
         'min_samples_split':[2, 3, 4, 5]}

#DecisionTreeClassifier(max_depth=4, min_samples_leaf=2, random_state=0)


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    DecisionTreeClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/DT_Task2_SFFS.csv')

### Pl trace

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from matplotlib.colors import ListedColormap
import graphviz
from sklearn.preprocessing import StandardScaler
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [None]:
f=pd.read_csv('Dataset/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
tuned_parameters = {'max_depth':[1, 2, 3, 4, 5],
         'min_samples_leaf':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
         'min_samples_split':[2, 3, 4, 5]}

#DecisionTreeClassifier(max_depth=4, min_samples_leaf=2, random_state=0)


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    DecisionTreeClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/DT_Pl_trace_SFFS.csv')

### Pl predict

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from matplotlib.colors import ListedColormap
import graphviz
from sklearn.preprocessing import StandardScaler
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [None]:
f=pd.read_csv('Dataset/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
tuned_parameters = {'max_depth':[1, 2, 3, 4, 5],
         'min_samples_leaf':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
         'min_samples_split':[2, 3, 4, 5]}

#DecisionTreeClassifier(max_depth=4, min_samples_leaf=2, random_state=0)


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    DecisionTreeClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(DecisionTreeClassifier(random_state=0),   # グリッドサーチで決定木を定義
                   tuned_parameters,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/DT_Pl_predict_SFFS.csv')

## ET method

### ZigZag trace

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'max_depth':[3, 4, 5],
    'min_samples_leaf':[1, 4, 7],
    'min_samples_split':[2]
}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    ExtraTreesClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/ET_Zigzag_trace_SFFS.csv')

### ZigZag predict

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'max_depth':[3, 4, 5],
    'min_samples_leaf':[1, 4, 7],
    'min_samples_split':[2]
}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    ExtraTreesClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/ET_Zigzag_predict_SFFS.csv')

### Pl trace

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'max_depth':[3, 4, 5],
    'min_samples_leaf':[1, 4, 7],
    'min_samples_split':[2]
}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    ExtraTreesClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/ET_Pl_trace_SFFS.csv')

### Pl predict

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'max_depth':[3, 4, 5],
    'min_samples_leaf':[1, 4, 7],
    'min_samples_split':[2]
}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    ExtraTreesClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(ExtraTreesClassifier(random_state=0),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/ET_Pl_predict_SFFS.csv')

## GNB method

### ZigZag trace

In [None]:
from sklearn.naive_bayes import GaussianNB
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'var_smoothing': np.logspace(0,-9, num=100)
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    GaussianNB(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/GNB_Zigzag_trace_SFFS.csv')

### ZigZag predict

In [None]:
from sklearn.naive_bayes import GaussianNB
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'var_smoothing': np.logspace(0,-9, num=100)
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    GaussianNB(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/GNB_Zigzag_predict_SFFS.csv')

### Pl trace

In [None]:
from sklearn.naive_bayes import GaussianNB
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'var_smoothing': np.logspace(0,-9, num=100)
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    GaussianNB(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/GNB_Pl_trace_SFFS.csv')

### Pl predict

In [None]:
from sklearn.naive_bayes import GaussianNB
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'var_smoothing': np.logspace(0,-9, num=100)
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    GaussianNB(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(GaussianNB(),
                   params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/GNB_Pl_predict_SFFS.csv')

## KNN method

### ZigZag trace

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'n_neighbors': np.arange(1, 20),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    KNeighborsClassifier(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/KNN_Zigzag_trace_SFFS.csv')

### ZigZag predict

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'n_neighbors': np.arange(1, 20),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    KNeighborsClassifier(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/KNN_Zigzag_predict_SFFS.csv')

### Pl trace

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'n_neighbors': np.arange(1, 20),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    KNeighborsClassifier(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/KNN_Pl_trace_SFFS.csv')

### Pl predict

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import csv
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
f=pd.read_csv('Dataset/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    'n_neighbors': np.arange(1, 20),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    KNeighborsClassifier(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(KNeighborsClassifier(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/KNN_Pl_predict_SFFS.csv')

## LOG method

### ZigZag trace

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [None]:
f=pd.read_csv('Dataset/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    "C": [10**i for i in range(-4,4)],
    "penalty": ["l1", "l2"],
    "random_state": [1],
    "solver":['liblinear']
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    LogisticRegression(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/LOG_Zigzag_trace_SFFS.csv')

### ZigZag predict

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [None]:
f=pd.read_csv('Dataset/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    "C": [10**i for i in range(-4,4)],
    "penalty": ["l1", "l2"],
    "random_state": [1],
    "solver":['liblinear']
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    LogisticRegression(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/LOG_Zigzag_predict_SFFS.csv')

### Pl trace

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [None]:
f=pd.read_csv('Dataset/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    "C": [10**i for i in range(-4,4)],
    "penalty": ["l1", "l2"],
    "random_state": [1],
    "solver":['liblinear']
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    LogisticRegression(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/LOG_Pl_trace_SFFS.csv')

### Pl predict

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [None]:
f=pd.read_csv('Dataset/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')

#データチューニング
params = {
    "C": [10**i for i in range(-4,4)],
    "penalty": ["l1", "l2"],
    "random_state": [1],
    "solver":['liblinear']
}


from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )

In [None]:
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(2,31):
    sffs = SFS(
    LogisticRegression(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(LogisticRegression(),
                   param_grid=params,
                   cv=cv,
                   n_jobs=-1,
                   verbose=2
                  )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/RF_Zigzag_trace.csv')'''

In [None]:
re.to_csv('Dataset/LOG_Pl_predict_SFFS.csv')

## RF method

### ZigZag trace

#### ML Code

In [None]:
%pwd

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
#pl trace 16 30 50
#pl forecast 20 40 60
#zig trace 12 31 51
#zig forecast 20 40 60

In [None]:
path="C:\\Users\\shinlab\\Desktop\\ChildOrAdult-20211214T064035Z-001\\ChildOrAdult\\result"
f=pd.read_csv('Dataset/DD_pentablet/result/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)


#データチューニング
tuned_parameters = {"max_depth": [5, None],
                     "n_estimators":[50,100,200,300],
                     "min_samples_split": [2, 3],
                     "min_samples_leaf": [1, 3],
                     "bootstrap": [True],
                     "criterion": ["gini", "entropy"]}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
)
"""
{"max_depth": [2,3,5,10, None],
 "n_estimators":[50,100,200,300,400],
 "min_samples_split": [2, 3, 10],
 "min_samples_leaf": [1, 3, 10],
 "bootstrap": [True, False],
 "criterion": ["gini", "entropy"]}
 """

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(1,31):
    sffs = SFS(
    RandomForestClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=1, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(
    RandomForestClassifier(random_state=0), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=2,
    cv=cv, # 交差検定の回数
    )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_trace.csv')'''

In [None]:
re

In [None]:
re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_trace.csv')

#### CM and result detail Code

In [None]:
from sklearn.metrics import confusion_matrix
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
#change feature shape
flist = [6, 7, 20, 28]
changed_list = [n+3 for n in flist]
changed_list

In [None]:
#data separation
#(3, 4, 5, 6, 7, 20, 22, 23, 25, 29)

X = df[:,changed_list]
y =f.iloc[:,2]

y=y.astype('int')
print(y)
y.value_counts()
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
stdsc = StandardScaler()

In [None]:
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
X_std = stdsc.fit_transform(X)

In [None]:
tuned_parameters = {'bootstrap': [True],
                    'criterion': ['gini'],
                    'max_depth': [5],
                    'min_samples_leaf': [1],
                    'min_samples_split': [3],
                    'n_estimators': [50]
                   }
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 50}

In [None]:
#GridSearch
clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1
)

clf.fit(X_std, y)

#result(score + param)
print(clf.best_score_, clf.best_params_)

In [None]:
def generate_param(param):
    new_param = ""
    count = 0
    for i in param:
        if i == '{' or i == '}':
            i = ''
        elif i == ':':
            i = '='
            count += 1
        elif (count == 2 or count == 3) and i == '\'':
            count += 1
        elif (count != 2 and count != 3) and i == '\'':
            i = ''
        new_param += i
    return new_param
        

In [None]:
sample = "{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 50}"
tuned_param = generate_param(sample)

In [None]:
tuned_param

In [None]:
model=RandomForestClassifier(bootstrap= True, criterion= 'gini', max_depth= 5, min_samples_leaf= 1, min_samples_split= 3, n_estimators= 50,
                            random_state=0)
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}

In [None]:
# loocv to manually evaluate the performance of a random forest classifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# create loocv procedure
cv = LeaveOneOut()
# enumerate splits
y_true, y_pred = list(), list()
for train_ix, test_ix in cv.split(X_std):
    # split data
    X_train, X_test = X_std[train_ix, :], X_std[test_ix, :]
    y_train, y_test = y[train_ix], y[test_ix]
    model.fit(X_train, y_train)
    # evaluate model
    yhat = model.predict(X_test)
    # store
    #y_true.append(y_test[0])
    y_pred.append(yhat[0])
# calculate accuracy
acc = accuracy_score(y, y_pred)
print('Accuracy: %.3f' % acc)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y, y_pred)

sns.heatmap(cm, annot=True)
#plt.savefig('Dataset/DD_pentablet/result/Zigzag_trace_RF_heatmap.png')

In [None]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

acc = accuracy_score(y, y_pred)
pre = precision_score(y, y_pred)
rec = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
auc = roc_auc_score(y, y_pred)


re = pd.DataFrame()
re_i = pd.DataFrame([[flist,acc,clf.best_params_,pre,rec,f1,auc]], columns=['Feature','accuracy','param','precision','recall','f1','AUC'])
re = pd.concat([re,re_i])
#re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_trace_29_rep.csv', index = False)

In [None]:
re

### ZigZag predict

#### ML Code

In [None]:
%pwd

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
#pl trace 16 30 50
#pl forecast 20 40 60
#zig trace 12 31 51
#zig forecast 20 40 60

In [None]:
path="C:\\Users\\shinlab\\Desktop\\ChildOrAdult-20211214T064035Z-001\\ChildOrAdult\\result"
f=pd.read_csv('Dataset/DD_pentablet/result/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)


#データチューニング
tuned_parameters = {"max_depth": [2,3,5, None],
                     "n_estimators":[50,100,200,300],
                     "min_samples_split": [2, 3, 10],
                     "min_samples_leaf": [1, 3, 10],
                     "bootstrap": [True, False],
                     "criterion": ["gini", "entropy"]}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
)
"""
{"max_depth": [5, None],
 "n_estimators":[50,100,200,300],
 "min_samples_split": [2, 3],
 "min_samples_leaf": [1, 3],
 "bootstrap": [True],
 "criterion": ["gini", "entropy"]}
 
 {"max_depth": [2,3,5,10, None],
 "n_estimators":[50,100,200,300,400],
 "min_samples_split": [2, 3, 10],
 "min_samples_leaf": [1, 3, 10],
 "bootstrap": [True, False],
 "criterion": ["gini", "entropy"]}
 """

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(1,31):
    sffs = SFS(
    RandomForestClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(
    RandomForestClassifier(random_state=0), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=2,
    cv=cv, # 交差検定の回数
    )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_trace.csv')'''

In [None]:
re

In [None]:
re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_predict.csv')

#### CM and result detail Code

In [None]:
from sklearn.metrics import confusion_matrix
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
#change feature shape
flist = [2, 6, 8, 20, 25]
changed_list = [n+3 for n in flist]
changed_list

In [None]:
#data separation
#(3, 4, 5, 6, 7, 20, 22, 23, 25, 29)

X = df[:,changed_list]
y =f.iloc[:,2]

y=y.astype('int')
print(y)
y.value_counts()
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
stdsc = StandardScaler()

In [None]:
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
X_std = stdsc.fit_transform(X)

In [None]:
tuned_parameters = {'bootstrap': [True],
                    'criterion': ['gini'],
                    'max_depth': [None],
                    'min_samples_leaf': [1],
                    'min_samples_split': [2],
                    'n_estimators': [100]
                   }
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

In [None]:
#GridSearch
clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1
)

clf.fit(X_std, y)

#result(score + param)
print(clf.best_score_, clf.best_params_)

In [None]:
def generate_param(param):
    new_param = ""
    count = 0
    for i in param:
        if i == '{' or i == '}':
            i = ''
        elif i == ':':
            i = '='
            count += 1
        elif (count == 2 or count == 3) and i == '\'':
            count += 1
        elif (count != 2 and count != 3) and i == '\'':
            i = ''
        new_param += i
    return new_param
        

In [None]:
sample = "{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}"
tuned_param = generate_param(sample)

In [None]:
tuned_param

In [None]:
model=RandomForestClassifier(bootstrap= True, criterion= 'gini', max_depth= None, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 100,
                            random_state=0)
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}

In [None]:
# loocv to manually evaluate the performance of a random forest classifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# create loocv procedure
cv = LeaveOneOut()
# enumerate splits
y_true, y_pred = list(), list()
for train_ix, test_ix in cv.split(X_std):
    # split data
    X_train, X_test = X_std[train_ix, :], X_std[test_ix, :]
    y_train, y_test = y[train_ix], y[test_ix]
    model.fit(X_train, y_train)
    # evaluate model
    yhat = model.predict(X_test)
    # store
    #y_true.append(y_test[0])
    y_pred.append(yhat[0])
# calculate accuracy
acc = accuracy_score(y, y_pred)
print('Accuracy: %.3f' % acc)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y, y_pred)

sns.heatmap(cm, annot=True)
#plt.savefig('Dataset/DD_pentablet/result/Zigzag_predict_RF_heatmap.png')

In [None]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

acc = accuracy_score(y, y_pred)
pre = precision_score(y, y_pred)
rec = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
auc = roc_auc_score(y, y_pred)


re = pd.DataFrame()
re_i = pd.DataFrame([[flist,acc,clf.best_params_,pre,rec,f1,auc]], columns=['Feature','accuracy','param','precision','recall','f1','AUC'])
re = pd.concat([re,re_i])
#re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_predict_29_rep.csv', index = False)

In [None]:
re

### Pl trace

#### ML Code

In [None]:
%pwd

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
#pl trace 16 30 50
#pl forecast 20 40 60
#zig trace 12 31 51
#zig forecast 20 40 60

In [None]:
path="C:\\Users\\shinlab\\Desktop\\ChildOrAdult-20211214T064035Z-001\\ChildOrAdult\\result"
f=pd.read_csv('Dataset/DD_pentablet/result/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)


#データチューニング
tuned_parameters = {"max_depth": [5, None],
                     "n_estimators":[50,100,200,300],
                     "min_samples_split": [2, 3],
                     "min_samples_leaf": [1, 3],
                     "bootstrap": [True],
                     "criterion": ["gini", "entropy"]}
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
)
"""
{"max_depth": [2,3,5,10, None],
 "n_estimators":[50,100,200,300,400],
 "min_samples_split": [2, 3, 10],
 "min_samples_leaf": [1, 3, 10],
 "bootstrap": [True, False],
 "criterion": ["gini", "entropy"]}
 """

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(1,31):
    sffs = SFS(
    RandomForestClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(
    RandomForestClassifier(random_state=0), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=1,
    cv=cv, # 交差検定の回数
    )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_trace.csv')'''

In [None]:
re

In [None]:
re.to_csv('Dataset/DD_pentablet/result/RF_Pl_trace.csv')

#### CM and result detail Code

In [None]:
from sklearn.metrics import confusion_matrix
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
#change feature shape
flist = [4, 6, 10, 15, 22, 26, 27]
changed_list = [n+3 for n in flist]
changed_list

In [None]:
#data separation
#(3, 4, 5, 6, 7, 20, 22, 23, 25, 29)

X = df[:,changed_list]
y =f.iloc[:,2]

y=y.astype('int')
print(y)
y.value_counts()
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
stdsc = StandardScaler()

In [None]:
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
X_std = stdsc.fit_transform(X)

In [None]:
tuned_parameters = {'bootstrap': [True],
                    'criterion': ['gini'],
                    'max_depth': [5],
                    'min_samples_leaf': [1],
                    'min_samples_split': [3],
                    'n_estimators': [100]
                   }
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}

In [None]:
#GridSearch
clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1
)

clf.fit(X_std, y)

#result(score + param)
print(clf.best_score_, clf.best_params_)

In [None]:
def generate_param(param):
    new_param = ""
    count = 0
    for i in param:
        if i == '{' or i == '}':
            i = ''
        elif i == ':':
            i = '='
            count += 1
        elif (count == 2 or count == 3) and i == '\'':
            count += 1
        elif (count != 2 and count != 3) and i == '\'':
            i = ''
        new_param += i
    return new_param
        

In [None]:
sample = "{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}"
tuned_param = generate_param(sample)

In [None]:
tuned_param

In [None]:
model=RandomForestClassifier(bootstrap= True, criterion= 'gini', max_depth= 5, min_samples_leaf= 1, min_samples_split= 3, n_estimators= 100,
                            random_state=0)
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}

In [None]:
# loocv to manually evaluate the performance of a random forest classifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# create loocv procedure
cv = LeaveOneOut()
# enumerate splits
y_true, y_pred = list(), list()
for train_ix, test_ix in cv.split(X_std):
    # split data
    X_train, X_test = X_std[train_ix, :], X_std[test_ix, :]
    y_train, y_test = y[train_ix], y[test_ix]
    model.fit(X_train, y_train)
    # evaluate model
    yhat = model.predict(X_test)
    # store
    #y_true.append(y_test[0])
    y_pred.append(yhat[0])
# calculate accuracy
acc = accuracy_score(y, y_pred)
print('Accuracy: %.3f' % acc)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y, y_pred)

sns.heatmap(cm, annot=True)
#plt.savefig('Dataset/DD_pentablet/result/Pl_trace_RF_heatmap.png')

In [None]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

acc = accuracy_score(y, y_pred)
pre = precision_score(y, y_pred)
rec = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
auc = roc_auc_score(y, y_pred)


re = pd.DataFrame()
re_i = pd.DataFrame([[flist,acc,clf.best_params_,pre,rec,f1,auc]], columns=['Feature','accuracy','param','precision','recall','f1','AUC'])
re = pd.concat([re,re_i])
#re.to_csv('Dataset/DD_pentablet/result/RF_Pl_trace_29_rep.csv', index = False)

In [None]:
re

### Pl predict

#### ML Code

In [None]:
%pwd

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
#pl trace 16 30 50
#pl forecast 20 40 60
#zig trace 12 31 51
#zig forecast 20 40 60

In [None]:
# path="C:\\Users\\shinlab\\Desktop\\ChildOrAdult-20211214T064035Z-001\\ChildOrAdult\\result"
f=pd.read_csv('Dataset/DD_pentablet/result/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
X = df[:,2:]
y = df[:,1]
print(y)
y=y.astype('int')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)


#データチューニング
tuned_parameters = {"max_depth": [5, None],
                     "n_estimators":[50,100,200,300],
                     "min_samples_split": [2, 3],
                     "min_samples_leaf": [1, 3],
                     "bootstrap": [True],
                     "criterion": ["gini", "entropy"]}

from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
)
"""
{"max_depth": [2,3,5,10, None],
 "n_estimators":[50,100,200,300,400],
 "min_samples_split": [2, 3, 10],
 "min_samples_leaf": [1, 3, 10],
 "bootstrap": [True, False],
 "criterion": ["gini", "entropy"]}
 """

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)
for i in range(1,31):
    sffs = SFS(
    RandomForestClassifier(random_state=0), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    clf = GridSearchCV(
    RandomForestClassifier(random_state=0), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=1,
    cv=cv, # 交差検定の回数
    )
    
    clf.fit(X_train_sffs_std, y)

    print(clf.best_score_, clf.best_params_)
    
    if clf.best_score_ < best_acc - 0.05:
        break
    elif clf.best_score_ > best_acc:
        best_acc = clf.best_score_
    
    re_i = pd.DataFrame([[sffs.k_feature_idx_,clf.best_score_,clf.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    '''# トレーニングデータに対する精度
    pred_train = random_forest.predict(X_train_sffs)
    accuracy_train = accuracy_score(y_train, pred_train)

    # テストデータに対する精度
    pred_test = random_forest.predict(X_test_sffs)
    accuracy_test = accuracy_score(y_test, pred_test)

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy_train,accuracy_test]], columns=['Feature','TrainigScore','TestScore'])
    re = pd.concat([re,re_i])

re.to_csv('Dataset/DD_pentablet/result/RF_Zigzag_trace.csv')'''

In [None]:
re

In [None]:
re.to_csv('Dataset/DD_pentablet/result/RF_Pl_predict.csv')

#### CM and result detail Code

In [None]:
from sklearn.metrics import confusion_matrix
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
#change feature shape
flist = [2, 3, 4, 6, 7, 8, 9, 14, 24]
changed_list = [n+3 for n in flist]
changed_list

In [None]:
#data separation
#(3, 4, 5, 6, 7, 20, 22, 23, 25, 29)

X = df[:,changed_list]
y =f.iloc[:,2]

y=y.astype('int')
print(y)
y.value_counts()
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
stdsc = StandardScaler()

In [None]:
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
X_std = stdsc.fit_transform(X)

In [None]:
tuned_parameters = {'bootstrap': [True],
                    'criterion': ['gini'],
                    'max_depth': [None],
                    'min_samples_leaf': [1],
                    'min_samples_split': [2],
                    'n_estimators': [100]
                   }
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

In [None]:
#GridSearch
clf = GridSearchCV(
RandomForestClassifier(random_state=0), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1
)

clf.fit(X_std, y)

#result(score + param)
print(clf.best_score_, clf.best_params_)

In [None]:
def generate_param(param):
    new_param = ""
    count = 0
    for i in param:
        if i == '{' or i == '}':
            i = ''
        elif i == ':':
            i = '='
            count += 1
        elif (count == 2 or count == 3) and i == '\'':
            count += 1
        elif (count != 2 and count != 3) and i == '\'':
            i = ''
        new_param += i
    return new_param
        

In [None]:
sample = "{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}"
tuned_param = generate_param(sample)

In [None]:
tuned_param

In [None]:
model=RandomForestClassifier(bootstrap= True, criterion= 'gini', max_depth= None, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 100,
                            random_state=0)
#{'bootstrap': True, 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}

In [None]:
# loocv to manually evaluate the performance of a random forest classifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# create loocv procedure
cv = LeaveOneOut()
# enumerate splits
y_true, y_pred = list(), list()
for train_ix, test_ix in cv.split(X_std):
    # split data
    X_train, X_test = X_std[train_ix, :], X_std[test_ix, :]
    y_train, y_test = y[train_ix], y[test_ix]
    model.fit(X_train, y_train)
    # evaluate model
    yhat = model.predict(X_test)
    # store
    #y_true.append(y_test[0])
    y_pred.append(yhat[0])
# calculate accuracy
acc = accuracy_score(y, y_pred)
print('Accuracy: %.3f' % acc)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y, y_pred)

sns.heatmap(cm, annot=True)
#plt.savefig('Dataset/DD_pentablet/result/Pl_predict_RF_heatmap.png')

In [None]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

acc = accuracy_score(y, y_pred)
pre = precision_score(y, y_pred)
rec = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
auc = roc_auc_score(y, y_pred)


re = pd.DataFrame()
re_i = pd.DataFrame([[flist,acc,clf.best_params_,pre,rec,f1,auc]], columns=['Feature','accuracy','param','precision','recall','f1','AUC'])
re = pd.concat([re,re_i])
#re.to_csv('Dataset/DD_pentablet/result/RF_Pl_predict_29_rep.csv', index = False)

In [None]:
re

## SVM method

### ZigZag trace

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Zigzag_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
i = 0
for column in f:
    print(column + str(i))
    i += 1

In [None]:
df

In [None]:
#data separation
X = df[:,3:]
y =f.iloc[:,2]

y=y.astype('int')
print(y)
y.value_counts()
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
stdsc = StandardScaler()

In [None]:
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
X_std = stdsc.fit_transform(X)
#X_test_std = stdsc.transform(X_test)

In [None]:
tuned_parameters = {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}
"""
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.001, 0.0001]}, 75%
{'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2, 3, 4], 'gamma': [0.001, 0.0001]},
"""

In [None]:
gs = GridSearchCV(
SVC(), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1
)

In [None]:
best_acc = 0
re = pd.DataFrame()
for i in range(2,31):
    sffs = SFS(SVC(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,	#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]
    # 標準化
    stdsc = StandardScaler()
    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)

    #GridSearch
    gs = GridSearchCV(
    SVC(), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=2,
    cv=cv, # 交差検定の回数
    n_jobs=-1
    )
    
    gs.fit(X_train_sffs_std, y)
    
    #result(score + param)
    print(gs.best_score_, gs.best_params_)
    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    
    '''if gs.best_score_ < best_acc:
        break
    else:
        best_acc = gs.best_score_'''
    
'''
    svm.fit(X_train_sffs_std, y)

    # トレーニングデータに対する精度
    scores = cross_val_score(svm, X_std, y, scoring='accuracy', cv=LeaveOneOut(), n_jobs=-1)
    accuracy=scores.mean()

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy]], columns=['Feature','accuracy'])
    re = pd.concat([re,re_i])
    '''

In [None]:
print(re)

In [None]:
re.to_csv('Dataset/DD_pentablet/result/SVM_Zigzag_trace_29.csv')

In [None]:
re.max(axis=0)

### ZigZag predict

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Zigzag_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
i = 0
for column in f:
    print(column + str(i))
    i += 1

In [None]:
df

In [None]:
#data separation
X = df[:,3:]
y = f.iloc[:,2]
y=y.astype('int')
print(y)
y.value_counts()
#X = np.delete(X, 1, 1)_
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
# loocv to automatically evaluate the performance of a random forest classifier
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
stdsc = StandardScaler()

In [None]:
X_std = stdsc.fit_transform(X)

In [None]:
tuned_parameters = {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}
"""
'kernel': ['rbf','linear','poly','sigmoid']
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.001, 0.0001]}, 75%
{'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2, 3, 4], 'gamma': [0.001, 0.0001]},
"""

In [None]:
gs = GridSearchCV(
SVC(), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1,
scoring='accuracy'
)

In [None]:
best_acc = 0
re = pd.DataFrame()
for i in range(2,31):
    sffs = SFS(SVC(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,	#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]
    # 標準化
    stdsc = StandardScaler()
    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)

    #GridSearch
    gs = GridSearchCV(
    SVC(), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=2,
    cv=cv, # 交差検定の回数
    n_jobs=-1
    )
    
    gs.fit(X_train_sffs_std, y)
    
    #result(score + param)
    print(gs.best_score_, gs.best_params_)
    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    
    '''if gs.best_score_ < best_acc:
        break
    else:
        best_acc = gs.best_score_'''
    
'''
    svm.fit(X_train_sffs_std, y)

    # トレーニングデータに対する精度
    scores = cross_val_score(svm, X_std, y, scoring='accuracy', cv=LeaveOneOut(), n_jobs=-1)
    accuracy=scores.mean()

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy]], columns=['Feature','accuracy'])
    re = pd.concat([re,re_i])
    '''

In [None]:
print(re)

In [None]:
re.to_csv('Dataset/DD_pentablet/result/SVM_Zigzag_predict_29.csv')

In [None]:
re.max(axis=0)

### Pl trace

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Pl_trace.csv')
df = f.to_numpy()
df.shape

In [None]:
i = 0
for column in f:
    print(column + str(i))
    i += 1

In [None]:
df

In [None]:
#data separation
X = df[:,3:]
y = df[:,2]
y=y.astype('int')
print(y)
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
from sklearn.model_selection import LeaveOneOut

# create loocv procedure
cv = LeaveOneOut()

In [None]:
stdsc = StandardScaler()

In [None]:
X_std = stdsc.fit_transform(X)
#X_test_std = stdsc.transform(X_test)

In [None]:
tuned_parameters = {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}
"""
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.001, 0.0001]}, 75%
{'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2, 3, 4], 'gamma': [0.001, 0.0001]},
"""

In [None]:
gs = GridSearchCV(
SVC(), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
#cv=5, # 交差検定の回数
cv=cv,
n_jobs=-1
)

In [None]:
best_acc = 0
re = pd.DataFrame()
for i in range(2,31):
    sffs = SFS(SVC(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,	#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]
    # 標準化
    stdsc = StandardScaler()
    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)

    #GridSearch
    gs = GridSearchCV(
    SVC(), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=2,
    cv=cv, # 交差検定の回数
    n_jobs=-1
    )
    
    gs.fit(X_train_sffs_std, y)
    
    #result(score + param)
    print(gs.best_score_, gs.best_params_)
    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    
    '''if gs.best_score_ < best_acc:
        break
    else:
        best_acc = gs.best_score_'''
    
'''
    svm.fit(X_train_sffs_std, y)

    # トレーニングデータに対する精度
    scores = cross_val_score(svm, X_std, y, scoring='accuracy', cv=LeaveOneOut(), n_jobs=-1)
    accuracy=scores.mean()

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy]], columns=['Feature','accuracy'])
    re = pd.concat([re,re_i])
    '''

In [None]:
print(re)

In [None]:
re.to_csv('Dataset/DD_pentablet/result/SVM_Pl_trace_29.csv')

In [None]:
re.max(axis=0)

### Pl predict

In [None]:
import csv
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
%pwd

In [None]:
f=pd.read_csv('Dataset/DD_pentablet/result/Pl_predict.csv')
df = f.to_numpy()
df.shape

In [None]:
i = 0
for column in f:
    print(column + str(i))
    i += 1

In [None]:
df

In [None]:
#data separation
X = df[:,3:]
y = df[:,2]
y=y.astype('int')
print(y)
#X = np.delete(X, 1, 1)
#X = np.delete(X, 0, 1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
from sklearn.model_selection import LeaveOneOut
# create loocv procedure
cv = LeaveOneOut()

In [None]:
stdsc = StandardScaler()

In [None]:
X_std = stdsc.fit_transform(X)
#X_test_std = stdsc.transform(X_test)

In [None]:
tuned_parameters = {'C': [1, 10, 100, 1000], 'kernel': ['rbf','sigmoid','poly'], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}
"""
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.001, 0.0001]}, 75%
{'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2, 3, 4], 'gamma': [0.001, 0.0001]},
"""

In [None]:
gs = GridSearchCV(
SVC(), # 識別器
tuned_parameters, # 最適化したいパラメータセット
verbose=2,
cv=cv, # 交差検定の回数
n_jobs=-1
)

In [None]:
best_acc = 0
re = pd.DataFrame()
for i in range(2,31):
    sffs = SFS(SVC(), # 使う学習器
    k_features=i, #特徴をいくつまで選択するか
    forward=True, #Trueでforward selectionになる。Falseでback
    floating=True, #sffsを行うためのもの。
    verbose=2, #実行時のlogをどれだけ詳しく表示するか
    scoring='accuracy', # 評価指標
    cv=cv,	#クロスバリデーション
    n_jobs=-1
    )

    sffs = sffs.fit(X_std, y)
    #選ばれた特徴
    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]
    # 標準化
    stdsc = StandardScaler()
    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)

    #GridSearch
    gs = GridSearchCV(
    SVC(), # 識別器
    tuned_parameters, # 最適化したいパラメータセット
    verbose=2,
    cv=cv, # 交差検定の回数
    n_jobs=-1
    )
    
    gs.fit(X_train_sffs_std, y)
    
    svm = gs.best_estimator_
    scores = cross_val_score(svm, X_train_sffs_std, y, scoring='accuracy', cv=LeaveOneOut(), n_jobs=-1)
    accuracy=scores.mean()
    
    
    #result(score + param)
    print(accuracy, gs.best_params_)
    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]], columns=['Feature','accuracy','param'])
    re = pd.concat([re,re_i])
    
'''    if gs.best_score_ < best_acc:
        break
    else:
        best_acc = gs.best_score_'''
    
'''
    svm.fit(X_train_sffs_std, y)

    # トレーニングデータに対する精度
    scores = cross_val_score(svm, X_std, y, scoring='accuracy', cv=LeaveOneOut(), n_jobs=-1)
    accuracy=scores.mean()

    re_i = pd.DataFrame([[sffs.k_feature_idx_,accuracy]], columns=['Feature','accuracy'])
    re = pd.concat([re,re_i])
    '''

In [None]:
print(re)

In [None]:
re.to_csv('Dataset/DD_pentablet/result/SVM_Pl_predict_29.csv')

In [None]:
re.max(axis=0)