In [None]:
from sklearn.pipeline import Pipeline

def create_pipelines(estimators, scaler=None, reductor=None):
    '''
    パイプラインを構築する。
    estimators：モデルリスト
    scaler：標準化オブジェクト
    reductor：次元削減オブジェクト
    戻り値：構築したパイプライン
    '''
    pipelines ={}
    for est in estimators:
        steps = []
        if scaler is not None: steps.append(('scl', scaler))
        if reductor is not None: steps.append(('rdt', reductor))
        k, v = est
        steps.append(('est', v))
        pipelines[k] = Pipeline(steps)
    return pipelines

In [None]:
def get_params(pipeline, upd={}):
    '''
    グリッドサーチ用のパラメータ群を取得する。
    pipeline：対象パイプライン
    upd：更新する個別パラメータ　※初期値は空の辞書
    戻り値：グリッドサーチ用のパラメータ群
    '''
    params = {k: [v] for k, v in pipeline.get_params().items() if '__' in k}
    for k, v in upd.items():
        if k in params:
            params[k] = v
    return params

In [None]:
from sklearn.model_selection import GridSearchCV

def grid_search(pipeline, params, cv=3, scoring='f1'):
    '''
    グリッドサーチオブジェクトを作成する。
    pipeline：対象パイプライン
    params：パラメータ群
    cv：ホールディング数　※初期値は3
    scoring：スコアリングタイプ　※初期値はf1
    戻り値：グリッドサーチオブジェクト
    '''
    return GridSearchCV(pipeline, params, cv=cv, scoring=scoring, return_train_score=False, n_jobs=-1)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

estimators = [
    ('KNC', KNeighborsClassifier()),
    ('LRG', LogisticRegression()),
    ('SVC', SVC()),
    ('LSV', LinearSVC()),
    ('DTC', DecisionTreeClassifier()),
    ('RFC', RandomForestClassifier()),
    ('GBC', GradientBoostingClassifier()),
    ('MLP', MLPClassifier())
]

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

scaler = StandardScaler()
#scaler = MinMaxScaler()

In [None]:
from sklearn.decomposition import PCA
from sklearn.feature_selection import RFE

reductor = PCA(n_components=10)
#reductor = RFE(estimator=RandomForestClassifier(n_estimators=100, random_state=0), n_features_to_select=k, step=.05)

In [None]:
grid_params = {}

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

scores = {}
for name, pipeline in create_pipelines(estimators, scaler, reductor).items():
    #学習
    est = grid_search(pipeline, get_params(pipeline, grid_params[name]))
    est.fit(X_train, y_train)
    #スコア（訓練）
    train_pred = est.predict(X_train)
    scores[(name, 'train')] = accuracy_score(y_train, train_pred)
    #スコア（テスト）
    test_pred = est.predict(X_test)
    scores[(name, 'test')] = accuracy_score(y_test, test_pred)
    #混合行列の作成
    scores[(name, 'train_matrix')] = confusion_matrix(np.array(y_train), train_pred).reshape(1, 4)[0].tolist()
    scores[(name, 'test_matrix')] = confusion_matrix(np.array(y_test), test_pred).reshape(1, 4)[0].tolist()

#スコア表示
