# HyperOpt実行用プログラム

In [1]:
from hyperopt import fmin, tpe, hp, rand , space_eval
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import pickle
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
# warning無視
import warnings


SCORE_TYPE_F1 = 1
SCORE_TYPE_AUC = 2

RANDOM_STATE_VAL = 1

# うざいからwarningは無視
warnings.simplefilter('ignore')

############################
#### ★要カスタマイズ★ ####
TRAIN_PKLFILE_NAME = 'trainFeatureDf_s_90.pkl'
MAX_EVALS_VALUE = 100
score_type = SCORE_TYPE_AUC
algoIndex = 1   # 1: ロジスティック回帰、2:多層パーセプトロン、3:XGBoost

## 1: ロジスティック回帰
parameterDicLog = {
    'C':hp.uniform('C', 1e-4, 25.0),
    'penalty':hp.choice("penalty",["l1","l2"]),
    'max_iter':hp.randint('max_iter',700)
}
pipeLog = Pipeline([('scl',StandardScaler()),
                 ('est',LogisticRegression(random_state=RANDOM_STATE_VAL))])
## 2:多層パーセプトロン
parameterDicMlp = {
    'alpha':hp.uniform('alpha', 0.0001, 0.5),
    'max_iter':hp.randint('max_iter',1300),
    'learning_rate_init':hp.uniform('learning_rate_init', 0.0005, 1.0)
}
# hidden_layer_sizesは、
# Machine_learging_Classification.ipynb の getPipelines で定義している値に合わせること。
pipeMlp = Pipeline([('scl',StandardScaler()),
                 ('est',MLPClassifier(hidden_layer_sizes=(100,30,10),random_state=RANDOM_STATE_VAL))])
## 3:XGBoost
parameterDicXg = {
    'learning_rate':hp.uniform('learning_rate', 1e-3, 1.0),
    'max_depth':hp.randint('max_depth',10),
    'min_child_weight':hp.randint('min_child_weight',21),
    'subsample':hp.uniform('subsample', 0.5, 1.0),
    'colsample_bytree':hp.uniform('colsample_bytree', 0.5, 1.0),
    'gamma':hp.uniform('gamma', 0, 0.5),
    'reg_lambda':hp.uniform('reg_lambda', 0.5, 1.0),
    'reg_alpha':hp.uniform('reg_alpha', 0, 0.5)
}
# pipeXg = Pipeline([('scl',StandardScaler()),
#                  ('est',xgb.XGBClassifier(random_state=RANDOM_STATE_VAL))])
pipeXg = Pipeline([('est',xgb.XGBClassifier(random_state=RANDOM_STATE_VAL))])
############################

algoDic={1: "ロジスティック回帰", 2:"多層パーセプトロン", 3:"XGBoost"}

# 変数のセット
pipelines = [pipeLog, pipeMlp, pipeXg]
pipe = pipelines[algoIndex - 1]
parameterDics = [parameterDicLog, parameterDicMlp, parameterDicXg]
parameterDic = parameterDics[algoIndex - 1]

# 訓練用データの特徴量をロード
train_s_pkl_file = './data/pkl/' + TRAIN_PKLFILE_NAME
with open(train_s_pkl_file, 'rb') as DFfile:
    trainFeatureDf_s = pickle.load(DFfile)
# 訓練用データの正解ラベルをロード
y_train_pkl_file = './data/pkl/y_train.pkl'
with open(y_train_pkl_file, 'rb') as DFfile:
    y_train = pickle.load(DFfile)

    
# 元の訓練データを訓練用と検証用に分ける
train_data, test_data, y_train, y_test = train_test_split(trainFeatureDf_s,     # 特徴量
                                             y_train,    # 正解データ
                                             test_size=0.3 ,
                                             random_state=RANDOM_STATE_VAL)
train_target = y_train.values.ravel()
test_target = y_test.values.ravel()
    
    
count = 0
def function(args):
    pipe.fit(train_data,
                train_target)
    global count
    count = count + 1
    
    if(score_type == SCORE_TYPE_F1):
        if(count == 1):
            print("# f1")
        prediction = pipe.predict(test_data)
        #score = f1_score(test_target,prediction)
        #print("%s回目の推測" % str(count),score)
        # なお、hyperoptは最小化しか行なってくれないためにここでは戻り値の符号を逆転させています。 
        return -f1_score(test_target,prediction)
    else:
        if(count == 1):
            print("# AUC")
        proba = pipe.predict_proba(test_data)[:,1]
        #score = roc_auc_score(test_target, proba)
        #print("%s回目の推測" % str(count),score)
        # なお、hyperoptは最小化しか行なってくれないためにここでは戻り値の符号を逆転させています。 
        return -roc_auc_score(test_target, proba)

## 実行
print(algoDic.get(algoIndex))

bestPara = fmin(function,parameterDic,algo=tpe.suggest,max_evals=MAX_EVALS_VALUE)
print("best estimate parameters = ",space_eval(parameterDic, bestPara))

ロジスティック回帰
# AUC
best estimate parameters =  {'C': 10.26405036766393, 'max_iter': 316, 'penalty': 'l2'}
