In [3]:
import lightgbm as lgb
import pandas as pd
import numpy as np
import os
import glob
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV

In [4]:
airflow_home = os.environ['AIRFLOW_HOME']

In [5]:
df_raw = pd.read_csv(f'{airflow_home}/data/processed/preprocessed_full.csv')

In [6]:
max_date = df_raw['OrderDate'].max()

In [7]:
cols = ['ChannelID',
        'Cluster',
        'prepay',
        'count_edit',
        'interval_time',
        'order_weekday',
        'weekday',
        'interval_high',
        'CancelFlag',
]

In [8]:
data = df_raw[cols]

In [9]:
X = data.drop('CancelFlag', axis=1)
y = data['CancelFlag']
X.columns = range(len(X.columns))
y.columns = [0]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

dataset = lgb.Dataset(X, label=y, free_raw_data=False)
train_data = lgb.Dataset(X_train, label=y_train, free_raw_data=False)
test_data = lgb.Dataset(X_test, label=y_test, free_raw_data=False, reference=train_data)

In [11]:
param = {
    'num_leaves': 40,
    'objective': 'binary',
    'max_depth': -1,
    #'scale_pos_weight': '1.5',
    #'unbalance': 'true',
    #'min_sum_hessian_in_leaf': 1e-3,
    #'pos_bagging_fraction': 0.8,
    #'neg_bagging_fraction': 0.8,
    #'bagging_fraction': 0.8,
    #'bagging_freq': 5,
    #'max_bin': 120,
    #'min_gain_to_split': 
    #'min_data_in_leaf': 50,
    'learning_rate': 0.1,
    #'boosting': 'dart',
    #'lambda_l2': 1.0,
    #'cat_l2': 1.0,
    #'cat_smooth': 50,
    #'top_k': 100,
    'tree_learner': 'data',
    #'max_cat_group': 50
    #'boosting': 'dart'
}
param['metric'] = 'auc'

In [12]:
num_round = 100
bst = lgb.train(param, train_data, num_round, valid_sets=[test_data], early_stopping_rounds=5, verbose_eval=0)

In [13]:
score = str(round(bst.best_score['valid_0']['auc'], 4))[2:]
bst.save_model(f'{airflow_home}/model/model_{max_date}_{score}.txt', num_iteration=bst.best_iteration)

<lightgbm.basic.Booster at 0x7fa12b812240>