In [2]:
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

import numpy as np
from sqlalchemy import create_engine
import sqlalchemy as db
import pandas as pd

from mylib.featurenames import *
from mylib.cleaner import *
from mylib.randomer import *
from mylib.server_info import *


In [None]:

rf = RandomForestClassifier()
xgb = XGBClassifier()
lgb = LGBMClassifier()
gbm = GradientBoostingClassifier()
cat = CatBoostClassifier()

In [None]:
engine = create_engine(mysql_server_uri)
conn = engine.connect()


In [None]:
''' get db data '''
br_full_data = pd.read_sql_table('buyreports', conn)

In [None]:
''' db data filter '''
get_filter = ( br_full_data['isAllBuyed'] == 1) & ( br_full_data['isAllSelled'] == 1)
br = br_full_data[get_filter]

In [None]:
''' get features name'''
feature_names =  f_name_102
feature_size = len(feature_names)

''' set X data '''
X = br[feature_names].to_numpy(dtype=np.float64)

''' set y data '''
y_condition = (br['fMaxPowerAfterBuyWhile30'] < 0.03)
y = np.where(y_condition, 1, 0)

In [None]:
''' make random seed '''
random_seed = getRandomSeed()

''' split train test validation data '''
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)

In [3]:
# RF
param_rf ={
    'n_estimators':[100, 200], #, 400, 600],
    'max_depth':[6], #,8,10,12],
    # 'min_samples_leaf':[8,12,18],
    # 'min_samples_split':[8,16,20]
}

# XGB
param_xgb = {"max_depth": [10,30,50],
              "min_child_weight" : [1,3,6,10],
              "n_estimators": [200,300,500,1000]
              }    
# LGB                        
param_lgb = {"learning_rate" : [0.01,0.1,0.2,0.3,0.4,0.5],
             "max_depth" : [25, 50, 75],
             "num_leaves" : [100,300,500,900,1200],
             "n_estimators" : [100, 200, 300,500,800,1000],
             "learning_rate" : [0.01,0.1,0.2,0.3,0.4,0.5]
              }
# GBM              
param_gbm = {"max_depth" : [4,5,6,7,8,9,10],
             "learning_rate" : [0.01,0.1,0.2,0.3,0.4,0.5],
             "n_estimators" : [100,200,300,500]
              }
# CAT
param_cat = {"depth" : [6,4,5,7,8,9,10],
          "iterations" : [250,100,500,1000],
          "learning_rate" : [0.001,0.01,0.1,0.2,0.3], 
          "l2_leaf_reg" : [2,5,10,20,30],
          "border_count" : [254]
          }


In [None]:
scoring = ['accuracy','f1_macro']

In [None]:
gscv_rf = GridSearchCV (estimator = rf, param_grid = param_rf, scoring =scoring, cv = 3, refit=True, n_jobs=1, verbose=2)
# gscv_xgb = GridSearchCV (estimator = xgb, param_grid = param_xgb, scoring =scoring, cv = 3, refit=True, n_jobs=1, verbose=2)
# gscv_lgb = GridSearchCV (estimator = lgb, param_grid = param_lgb, scoring =scoring, cv = 3, refit=True, n_jobs=1, verbose=2)
# gscv_gbm = GridSearchCV (estimator = gbm, param_grid = param_gbm, scoring =scoring, cv = 3, refit=True, n_jobs=1, verbose=2)
# gscv_cat = GridSearchCV (estimator = cat, param_grid = param_cat, scoring =scoring, cv = 3, refit=True, n_jobs=1, verbose=2)

gscv_rf.fit(trainX, trainY)
# gscv_xgb.fit(trainX, trainY)
# gscv_lgb.fit(trainX, trainY)
# gscv_gbm.fit(trainX, trainY)
# gscv_cat.fit(trainX, trainY)
