In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

estimators = [
    ('KNC', KNeighborsClassifier()),
    ('LRG', LogisticRegression()),
    ('SVC', SVC()),
    ('LSV', LinearSVC()),
    ('DTC', DecisionTreeClassifier()),
    ('RFC', RandomForestClassifier()),
    ('GBC', GradientBoostingClassifier()),
    ('MLP', MLPClassifier())
]

In [None]:
grid_params = {
    'KNC':{},
    'LRG':{'est__solver': ['lbfgs']},
    'SVC':{},
    'LSV':{},
    'DTC':{},
    'RFC':{'est__n_estimators': [100]},
    'GBC':{},
    'MLP':{'est__max_iter': [500]}
}

In [1]:
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
x = data.data
y = data.target

In [2]:
from sklearn.model_selection import StratifiedShuffleSplit

ss = StratifiedShuffleSplit(n_splits=2, train_size=0.7, random_state=0)
train_idx, test_idx = ss.split(x, y)
x_train, x_test, y_train, y_test = x[train_idx[0]], x[train_idx[1]], y[train_idx[0]], y[train_idx[1]]

In [None]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import MLUtils as ut

scaler = ut.scaler(0)
reductor = ut.reductor()

scores = {}
for name, pipeline in ut.create_pipelines(estimators, scaler, reductor).items():
    #学習
    est = GridSearchCV(pipeline, ut.get_params(pipeline, grid_params[name]), cv=3, scoring='f1', return_train_score=False, n_jobs=-1)
    est.fit(x_train, y_train)
    #スコア（訓練）
    train_pred = est.predict(x_train)
    scores[(name, 'train')] = accuracy_score(y_train, train_pred)
    #スコア（テスト）
    test_pred = est.predict(x_test)
    scores[(name, 'test')] = accuracy_score(y_test, test_pred)
    #混合行列の作成
    scores[(name, 'train_matrix')] = confusion_matrix(np.array(y_train), train_pred).reshape(1, 4)[0].tolist()
    scores[(name, 'test_matrix')] = confusion_matrix(np.array(y_test), test_pred).reshape(1, 4)[0].tolist()

#スコア表示
for k, v in scores.items():
    print(k, v)

In [5]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
a = KNeighborsClassifier()
a.fit(x_train, y_train)
pred = a.predict(x_train)
print(accuracy_score(y_train, pred))
print(confusion_matrix(y_train, pred))

0.9447236180904522
[[134  14]
 [  8 242]]


In [26]:
print(len(np.where((y_train == False) & (pred == False))[0]))
print(len(np.where((y_train == False) & (pred == True))[0]))
print(len(np.where((y_train == True) & (pred == False))[0]))
print(len(np.where((y_train == True) & (pred == True))[0]))

134
14
8
242
