# 调参

In [1]:
from hyperopt import fmin, tpe, Trials, hp,STATUS_OK
import numpy as np
import pandas as pd
import lightgbm as lgb
import os
import time
import csv

In [2]:
#读取训练集
train=pd.read_csv('data/train1.csv')

In [3]:
features=list(train.columns)
features.remove('flag')
X_train, y_train=train[features],train['flag']
##数据转换
lgb_train = lgb.Dataset(X_train, y_train, free_raw_data=False)

In [4]:
#%%参数空间
space = {
    # 'class_weight': hp.choice('class_weight', [None, 'balanced']),
    'boosting_type': hp.choice('boosting_type', [{'boosting_type': 'gbdt'}]),
    'num_leaves': hp.quniform('num_leaves', 10, 200, 5),
    'learning_rate': hp.uniform('learning_rate', 1e-3, 5e-1),
    'min_child_samples': hp.quniform('min_child_samples', 40, 401, 20),
    'reg_alpha': hp.uniform('reg_alpha', 0.0, 10),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 10),
    'subsample_freq':hp.uniform("subsample", 0.5, 0.9),
    'colsample_bytree':hp.quniform('colsample_bytree',0.7, 1.0, 0.1),
    'bagging_fraction':hp.quniform('bagging_fraction', 0.7, 1.0, 0.1),
    'max_depth': hp.randint("max_depth", 8),
    'objective': hp.choice('objective', ['binary']), 
    'n_estimators': hp.quniform('n_estimators',500,5000,200)
}


In [5]:
#%%目标函数
def objective(params, n_flods=5):  
    params['boosting_type'] = params['boosting_type']['boosting_type']
    for param in ['n_estimators','max_depth','num_leaves','min_child_samples','subsample_freq']:
        params[param] = int( params[param])
    
    start = time.clock()
    
    cv_results = lgb.cv(params=params,train_set=lgb_train, nfold=n_flods,
                        early_stopping_rounds=100, metrics='auc', seed=50,verbose_eval=False)
# =============================================================================
#     model_lgb = lgb.LGBMClassifier(boosting_type=params['boosting_type'],
#                     objective = params['objective'],
#                     metric = 'auc', 
#                     subsample_freq=params['subsample_freq'],
#                     learning_rate= params['learning_rate'],
#                     n_estimators = params['n_estimators'],
#                     max_depth = params['max_depth'],
#                     bagging_fraction = params['bagging_fraction'],
#                     colsample_bytree = params['colsample_bytree'],
#                     min_child_samples = params['min_child_samples'],
#                     reg_alpha = params['reg_alpha'],
#                     reg_lambda = params['reg_lambda'],
#                     seed=5000,
#                     n_jobs=4)
# =============================================================================
    best_score = np.max(cv_results['auc-mean'])
    loss = 1 - best_score
    
    run_time = (time.clock() - start)
    
    
    # 最大平均auc对应的提升术的迭代次数
    n_estimators = int(params['n_estimators'])
    
    # 把关心的结果输出到csv文件
    of_connection = open('result_lgb.csv', 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss,params,n_estimators,run_time])
    
    return_dict = {'loss':loss, 'params':params,'estimators':n_estimators,'train_time':run_time,'status':STATUS_OK}
    return return_dict


In [6]:
#%%优化算法algo
tpe_algorithm = tpe.suggest
bayes_trials = Trials()
best = fmin(fn = objective, space = space, algo = tpe_algorithm, 
                 max_evals = 100, trials = bayes_trials, rstate = np.random.RandomState(50),verbose=1)

columns=['loss', 'params', 'iteration', 'estimators', 'train_time']

best

  0%|                                    | 0/100 [00:00<?, ?it/s, best loss: ?]




  1%|           | 1/100 [00:18<29:59, 18.18s/it, best loss: 0.0744022723877169]




  2%|▏         | 2/100 [00:46<34:42, 21.25s/it, best loss: 0.07387033020904421]




  3%|▎         | 3/100 [01:11<36:13, 22.41s/it, best loss: 0.07387033020904421]




  4%|▍         | 4/100 [01:26<32:21, 20.23s/it, best loss: 0.07387033020904421]




  5%|▌         | 5/100 [01:38<28:07, 17.76s/it, best loss: 0.07387033020904421]




  6%|▌         | 6/100 [02:21<39:25, 25.16s/it, best loss: 0.07387033020904421]




  7%|▋         | 7/100 [02:35<33:44, 21.77s/it, best loss: 0.07387033020904421]




  8%|▋       | 8/100 [04:43<1:22:23, 53.73s/it, best loss: 0.07332884303025755]




  9%|▋       | 9/100 [06:05<1:34:16, 62.16s/it, best loss: 0.07332884303025755]




 10%|▋      | 10/100 [08:57<2:22:50, 95.22s/it, best loss: 0.07330354434890629]




 11%|▊      | 11/100 [10:34<2:21:55, 95.68s/it, best loss: 0.07330354434890629]




 12%|▊      | 12/100 [12:14<2:22:13, 96.97s/it, best loss: 0.07330354434890629]




 13%|▉      | 13/100 [12:27<1:44:13, 71.88s/it, best loss: 0.07330354434890629]




 14%|▉      | 14/100 [13:47<1:46:20, 74.19s/it, best loss: 0.07330354434890629]




 15%|█      | 15/100 [14:09<1:23:03, 58.63s/it, best loss: 0.07330354434890629]




 16%|█      | 16/100 [14:32<1:07:01, 47.87s/it, best loss: 0.07330354434890629]




 17%|█▌       | 17/100 [14:45<51:41, 37.36s/it, best loss: 0.07330354434890629]




 18%|█▎     | 18/100 [16:21<1:15:06, 54.96s/it, best loss: 0.07330354434890629]




 19%|█▎     | 19/100 [17:52<1:28:44, 65.74s/it, best loss: 0.07330354434890629]




 20%|█▍     | 20/100 [18:10<1:08:36, 51.46s/it, best loss: 0.07330354434890629]




 21%|█▉       | 21/100 [18:36<57:38, 43.78s/it, best loss: 0.07330354434890629]




 22%|█▉       | 22/100 [19:26<59:18, 45.63s/it, best loss: 0.07330354434890629]




 23%|█▌     | 23/100 [20:19<1:01:40, 48.06s/it, best loss: 0.07330354434890629]




 24%|█▋     | 24/100 [21:11<1:02:05, 49.02s/it, best loss: 0.07330354434890629]




 25%|█▊     | 25/100 [24:37<2:00:08, 96.12s/it, best loss: 0.07330354434890629]




 26%|█▊     | 26/100 [25:05<1:33:28, 75.79s/it, best loss: 0.07330354434890629]




 27%|█▉     | 27/100 [25:49<1:20:43, 66.35s/it, best loss: 0.07328610686917847]




 28%|█▉     | 28/100 [26:34<1:11:51, 59.88s/it, best loss: 0.07328610686917847]




 29%|██▌      | 29/100 [27:02<59:22, 50.17s/it, best loss: 0.07328610686917847]




 30%|██▋      | 30/100 [27:20<47:17, 40.53s/it, best loss: 0.07328610686917847]




 31%|██▊      | 31/100 [28:00<46:43, 40.63s/it, best loss: 0.07328610686917847]




 32%|██▉      | 32/100 [28:40<45:45, 40.38s/it, best loss: 0.07325324412711409]




 33%|██▉      | 33/100 [29:07<40:38, 36.39s/it, best loss: 0.07325324412711409]




 34%|███      | 34/100 [29:33<36:27, 33.14s/it, best loss: 0.07325324412711409]




 35%|███▏     | 35/100 [30:00<33:58, 31.37s/it, best loss: 0.07325324412711409]




 36%|███▏     | 36/100 [30:42<36:53, 34.58s/it, best loss: 0.07325324412711409]




 37%|███▎     | 37/100 [31:08<33:38, 32.04s/it, best loss: 0.07325324412711409]




 38%|███▍     | 38/100 [31:34<31:08, 30.13s/it, best loss: 0.07325324412711409]




 39%|███▌     | 39/100 [31:48<25:40, 25.26s/it, best loss: 0.07325324412711409]




 40%|███▌     | 40/100 [32:47<35:30, 35.51s/it, best loss: 0.07325324412711409]




 41%|███▋     | 41/100 [33:07<30:16, 30.78s/it, best loss: 0.07325324412711409]




 42%|███▊     | 42/100 [33:31<27:44, 28.69s/it, best loss: 0.07325324412711409]




 43%|███▊     | 43/100 [34:31<36:14, 38.15s/it, best loss: 0.07325324412711409]




 44%|███▉     | 44/100 [35:19<38:24, 41.15s/it, best loss: 0.07325324412711409]




 45%|███▏   | 45/100 [37:32<1:02:56, 68.66s/it, best loss: 0.07325324412711409]




 46%|████▏    | 46/100 [37:52<48:38, 54.05s/it, best loss: 0.07325324412711409]




 47%|████▏    | 47/100 [38:06<37:03, 41.96s/it, best loss: 0.07325324412711409]




 48%|████▎    | 48/100 [38:39<34:08, 39.39s/it, best loss: 0.07325324412711409]




 49%|████▍    | 49/100 [39:01<29:01, 34.14s/it, best loss: 0.07325324412711409]




 50%|████▌    | 50/100 [41:28<56:42, 68.05s/it, best loss: 0.07325324412711409]




 51%|████▌    | 51/100 [41:54<45:16, 55.44s/it, best loss: 0.07325324412711409]




 52%|████▋    | 52/100 [42:07<34:03, 42.57s/it, best loss: 0.07325324412711409]




 53%|████▊    | 53/100 [42:51<33:40, 42.98s/it, best loss: 0.07325324412711409]




 54%|████▊    | 54/100 [43:04<26:12, 34.18s/it, best loss: 0.07325324412711409]




 55%|████▉    | 55/100 [43:31<23:52, 31.82s/it, best loss: 0.07325324412711409]




 56%|█████    | 56/100 [43:49<20:26, 27.87s/it, best loss: 0.07325324412711409]




 57%|█████▏   | 57/100 [44:48<26:40, 37.21s/it, best loss: 0.07292900344464515]




 58%|█████▏   | 58/100 [47:44<55:03, 78.65s/it, best loss: 0.07292900344464515]




 59%|█████▎   | 59/100 [48:56<52:21, 76.62s/it, best loss: 0.07292900344464515]




 60%|█████▍   | 60/100 [49:50<46:41, 70.04s/it, best loss: 0.07292900344464515]




 61%|█████▍   | 61/100 [50:03<34:25, 52.97s/it, best loss: 0.07292900344464515]




 62%|█████▌   | 62/100 [51:40<41:44, 65.92s/it, best loss: 0.07292900344464515]




 63%|█████▋   | 63/100 [52:19<35:43, 57.94s/it, best loss: 0.07292900344464515]




 64%|█████▊   | 64/100 [54:42<50:03, 83.44s/it, best loss: 0.07292900344464515]




 65%|█████▊   | 65/100 [54:59<37:01, 63.48s/it, best loss: 0.07292900344464515]




 66%|█████▉   | 66/100 [55:21<28:56, 51.08s/it, best loss: 0.07292900344464515]




 67%|██████   | 67/100 [56:40<32:42, 59.48s/it, best loss: 0.07292900344464515]




 68%|██████   | 68/100 [57:15<27:51, 52.24s/it, best loss: 0.07292900344464515]




 69%|██████▏  | 69/100 [58:44<32:39, 63.20s/it, best loss: 0.07292900344464515]




 70%|██████▎  | 70/100 [59:20<27:30, 55.02s/it, best loss: 0.07292900344464515]




 71%|████▉  | 71/100 [1:01:50<40:25, 83.63s/it, best loss: 0.07292900344464515]




 72%|█████  | 72/100 [1:02:09<29:56, 64.15s/it, best loss: 0.07292900344464515]




 73%|█████  | 73/100 [1:02:43<24:45, 55.00s/it, best loss: 0.07292900344464515]




 74%|█████▏ | 74/100 [1:03:38<23:52, 55.09s/it, best loss: 0.07292900344464515]




 75%|█████▎ | 75/100 [1:03:52<17:47, 42.68s/it, best loss: 0.07292900344464515]




 76%|█████▎ | 76/100 [1:04:14<14:38, 36.60s/it, best loss: 0.07292900344464515]




 77%|█████▍ | 77/100 [1:04:39<12:42, 33.17s/it, best loss: 0.07292900344464515]




 78%|█████▍ | 78/100 [1:04:52<09:54, 27.03s/it, best loss: 0.07292900344464515]




 79%|█████▌ | 79/100 [1:05:33<10:54, 31.17s/it, best loss: 0.07292900344464515]




 80%|█████▌ | 80/100 [1:05:59<09:53, 29.70s/it, best loss: 0.07292900344464515]




 81%|█████▋ | 81/100 [1:06:19<08:30, 26.87s/it, best loss: 0.07292900344464515]




 82%|█████▋ | 82/100 [1:07:12<10:21, 34.54s/it, best loss: 0.07292900344464515]




 83%|█████▊ | 83/100 [1:08:01<11:01, 38.92s/it, best loss: 0.07292900344464515]




 84%|█████▉ | 84/100 [1:09:11<12:49, 48.10s/it, best loss: 0.07292900344464515]




 85%|█████▉ | 85/100 [1:09:32<10:01, 40.11s/it, best loss: 0.07292900344464515]




 86%|██████ | 86/100 [1:09:58<08:21, 35.83s/it, best loss: 0.07292900344464515]




 87%|██████ | 87/100 [1:10:22<06:58, 32.20s/it, best loss: 0.07292900344464515]




 88%|██████▏| 88/100 [1:10:55<06:31, 32.61s/it, best loss: 0.07292900344464515]




 89%|██████▏| 89/100 [1:11:10<05:00, 27.33s/it, best loss: 0.07292900344464515]




 90%|██████▎| 90/100 [1:12:15<06:24, 38.49s/it, best loss: 0.07292900344464515]




 91%|██████▎| 91/100 [1:12:28<04:39, 31.02s/it, best loss: 0.07292900344464515]




 92%|██████▍| 92/100 [1:13:10<04:34, 34.32s/it, best loss: 0.07292900344464515]




 93%|██████▌| 93/100 [1:13:32<03:33, 30.51s/it, best loss: 0.07292900344464515]




 94%|██████▌| 94/100 [1:16:35<07:38, 76.42s/it, best loss: 0.07292900344464515]




 95%|██████▋| 95/100 [1:16:49<04:47, 57.49s/it, best loss: 0.07292900344464515]




 96%|██████▋| 96/100 [1:18:26<04:37, 69.34s/it, best loss: 0.07292900344464515]




 97%|██████▊| 97/100 [1:18:45<02:42, 54.17s/it, best loss: 0.07292900344464515]




 98%|██████▊| 98/100 [1:19:29<01:42, 51.39s/it, best loss: 0.07292900344464515]




 99%|██████▉| 99/100 [1:19:42<00:39, 39.69s/it, best loss: 0.07292900344464515]




100%|██████| 100/100 [1:21:03<00:00, 48.64s/it, best loss: 0.07292900344464515]


{'bagging_fraction': 0.8,
 'boosting_type': 0,
 'colsample_bytree': 0.7000000000000001,
 'learning_rate': 0.0314557433111514,
 'max_depth': 7,
 'min_child_samples': 120.0,
 'n_estimators': 1200.0,
 'num_leaves': 170.0,
 'objective': 0,
 'reg_alpha': 7.348811880509189,
 'reg_lambda': 9.92159011937742,
 'subsample': 0.7226186898190853}