In [3]:
# import basice apis
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

# evaluate classifier models by input metrics 
def main():
    dataset = load_dataset
    X = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    y = pd.DataFrame(dataset.target, columns=['y'])

    # cross-validation by holdout
    X_train,X_test,y_train,y_test = train_test_split(X, y,test_size=0.20, random_state=1)

    # set pipelines for algorithms
    
    pipe_knn = Pipeline([('scl',StandardScaler()), ('est',KNeighborsClassifier())])
    
    pipe_logistic = Pipeline([('scl', StandardScaler()), ('est',LogisticRegression(random_state=1))])
    
    pipe_rf = Pipeline([('scl',StandardScaler()),('est',RandomForestClassifier(random_state=1))])
    
    pipe_gb = Pipeline([('scl',StandardScaler()),('est',GradientBoostingClassifier(random_state=1))])
    
    pipe_mlp = Pipeline([('scl',StandardScaler()),('est',MLPClassifier(max_iter=1000,hidden_layer_sizes=(5,3),random_state=1))])
    
    pipe_svc = Pipeline([('scl',StandardScaler()),('est',LinearSVC(random_state=1))])

    # evaluate following metrics
    # - 1 : accuracy  : 正解率
    # - 2 : precision : 適合率
    # - 3 : recall    : 再現率
    # - 4 : f1        : F1スコア
    # - 5 : auc       : AUC
    print('Enter one of the following metrics key')
    input_metrics = input('1:accuracy, 2:precision, 3:recall, 4:f1, 5:auc  ') 

    pipe_names = ['KNN','Logistic','RandomForest','GradientBoosting','MLP','LinerSVC']
    pipe_lines = [pipe_knn, pipe_logistic, pipe_rf, pipe_gb, pipe_mlp, pipe_svc]
    pipe_scores = {}

    # exec evaluate
    for (i,pipe) in enumerate(pipe_lines):
        pipe.fit(X_train, y_train.as_matrix().ravel())
        # accuracy 
        if input_metrics == '1': 
            pipe_scores[pipe_names[i]] = accuracy_score(y_test, pipe.predict(X_test))
        # precision 
        elif input_metrics == '2':
            pipe_scores[pipe_names[i]] = precision_score(y_test, pipe.predict(X_test))
        # recall
        elif input_metrics == '3':
            pipe_scores[pipe_names[i]] = recall_score(y_test, pipe.predict(X_test))
        # f1     
        elif input_metrics == '4':
            pipe_scores[pipe_names[i]] = f1_score(y_test, pipe.predict(X_test))
        # auc    
        elif input_metrics == '5':
            pipe_scores[pipe_names[i]] = roc_auc_score(y_test, pipe.predict(X_test))

        else:
            print('invalid input_metrics')
            break

    # print scores sorted descend
    for pipe, score in sorted(pipe_scores.items(), key=lambda x: x[1], reverse=True):
        print('%s: %.3f' %(pipe, score))

    # output best model
    best_scores = [kv for kv in pipe_scores.items() if kv[1] == max(pipe_scores.values())]
    # dump pkl
    with open( 'best_models.pickle', mode='wb') as f:
        pickle.dump(best_scores, f)


if __name__ == '__main__':

  # set parameter
  # import Sample Data to learn models
  load_dataset = datasets.load_breast_cancer()

  # main proc
  main()


Enter one of the following metrics key
1:accuracy, 2:precision, 3:recall, 4:f1, 5:auc  2
Logistic: 0.973
LinerSVC: 0.973
GradientBoosting: 0.947
RandomForest: 0.946
KNN: 0.935
MLP: 0.935
