In [1]:
import numpy as np
import tensorflow as tf
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow.keras as keras
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings('ignore')
import import_ipynb
from Preprocessing import pre_buy_num, pre_buy_amount_round, data_split
    
def RF_model(x_train, y_train, max_depth, n_estimators, min_samples_split):
    model = RandomForestRegressor(max_depth = max_depth, n_estimators = n_estimators, min_samples_split = min_samples_split, random_state=2022)
    model.fit(x_train,y_train)
    return model

def XGB_model(x_train, y_train, max_depth, learning_rate):
    model=XGBRegressor(max_depth = max_depth, learning_rate = learning_rate, random_state=2022)
    model.fit(x_train,y_train)
    return model

def LGB_model(x_train, y_train, max_depth, learning_rate):
    model=LGBMRegressor(max_depth = max_depth, learning_rate = learning_rate, random_state=2022)
    model.fit(x_train,y_train)
    return model

def CAT_model(x_train, y_train, max_depth, learning_rate):
    model=CatBoostRegressor(max_depth = max_depth, learning_rate = learning_rate, random_state=2022, silent=True)
    model.fit(x_train,y_train)
    return model

def DNN_model(x_train, y_train, hidden, units):
    np.random.seed(2022)
    tf.random.set_seed(2022)
    initializer = tf.keras.initializers.GlorotUniform(seed=2022)
    
    early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=5)
    check_point = ModelCheckpoint('temp/DNN_temp.h5',monitor='val_loss',mode='min',save_best_only=True)
    
    model = Sequential()
    model.add(keras.layers.Dense(units=units, activation='relu', input_shape=(x_train.shape[1],), kernel_initializer=initializer))
    for i in range(hidden):
        model.add(keras.layers.Dense(units=units, activation='relu', kernel_initializer=initializer))
    model.add(keras.layers.Dense(units=1, activation='linear', kernel_initializer=initializer))
    model.compile(optimizer='adam', loss='mse', metrics='mae')
    model.fit(x_train, y_train, epochs=50, batch_size=128, validation_split=0.2, callbacks=[early_stopping,check_point], verbose=0)
    return model

def model_evaluate(model, x_test, y_test):
    pred = model.predict(x_test)
    print("MSE: ", mean_squared_error(y_test, pred))
    print("RMSE: ", np.sqrt(mean_squared_error(y_test, pred)))
    print("MAE: ", mean_absolute_error(y_test, pred))
    print("R2_score: ", r2_score(y_test, pred))

importing Jupyter notebook from Preprocessing.ipynb


# 1번 주제: 총 구매횟수

In [11]:
data02_offline, data02_online, data03_offline, data03_online = pre_buy_num()

data02_offline

In [12]:
x_train, x_test, y_train, y_test = data_split(data02_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 44, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 3, units = 300), x_test, y_test)

-----RF_model-----
MSE:  13.825715086623482
RMSE:  3.7182946476339773
MAE:  1.8821173642530065
R2_score:  0.943926525023831
-----XGB_model-----
MSE:  29.0609207235331
RMSE:  5.39081818683705
MAE:  3.3657653763749265
R2_score:  0.8821365259760006
-----LGB_model-----
MSE:  63.10166936270983
RMSE:  7.9436559192043195
MAE:  4.7570279034979555
R2_score:  0.744076175749653
-----CAT_model-----
MSE:  27.540089635302632
RMSE:  5.24786524553581
MAE:  3.3641174677513015
R2_score:  0.8883046180735571
-----DNN_model-----
MSE:  57.652041695393855
RMSE:  7.5928941580529
MAE:  4.506258856695938
R2_score:  0.7661784365526644


data02_online

In [3]:
x_train, x_test, y_train, y_test = data_split(data02_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 45, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 4, units = 300), x_test, y_test)

-----RF_model-----
MSE:  7.6723592022630225
RMSE:  2.7699023813598598
MAE:  1.366625534426022
R2_score:  0.8900722076220451
-----XGB_model-----
MSE:  9.48515378658477
RMSE:  3.079797685982761
MAE:  1.7748147565596528
R2_score:  0.8640989050907424
-----LGB_model-----
MSE:  19.710278760732855
RMSE:  4.439625970814755
MAE:  2.741841197125275
R2_score:  0.7175956737423911
-----CAT_model-----
MSE:  9.949124258715731
RMSE:  3.154223241737295
MAE:  1.865262517478538
R2_score:  0.8574512432196884
-----DNN_model-----
MSE:  23.80589813143926
RMSE:  4.879128829149653
MAE:  2.8565496234196224
R2_score:  0.6589145843964508


data03_offline

In [4]:
x_train, x_test, y_train, y_test = data_split(data03_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 44, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.1), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 3, units = 300), x_test, y_test)

-----RF_model-----
MSE:  20.317747135546394
RMSE:  4.507521174165063
MAE:  2.3440311014307085
R2_score:  0.906086845701141
-----XGB_model-----
MSE:  27.94368348115595
RMSE:  5.286178532849222
MAE:  3.25581305500181
R2_score:  0.870838068761421
-----LGB_model-----
MSE:  46.1101184502623
RMSE:  6.790443170387504
MAE:  4.779878302225002
R2_score:  0.786868758634066
-----CAT_model-----
MSE:  26.01372822640551
RMSE:  5.100365499295664
MAE:  3.389803629488527
R2_score:  0.8797587519661142
-----DNN_model-----
MSE:  64.66578527092405
RMSE:  8.04150391847968
MAE:  5.318389244975679
R2_score:  0.7011003321632853


data03_online

In [5]:
x_train, x_test, y_train, y_test = data_split(data03_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 35, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.1), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.1), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 4, units = 200), x_test, y_test)

-----RF_model-----
MSE:  12.28855545907168
RMSE:  3.50550359564381
MAE:  2.015202506061392
R2_score:  0.9119370787000942
-----XGB_model-----
MSE:  12.431996601838208
RMSE:  3.5259036574810447
MAE:  2.157047173804479
R2_score:  0.9109091429017256
-----LGB_model-----
MSE:  18.895961225021296
RMSE:  4.34694849578659
MAE:  2.97938748583994
R2_score:  0.8645867244699867
-----CAT_model-----
MSE:  11.911666452296796
RMSE:  3.4513282156724525
MAE:  2.23701604440326
R2_score:  0.9146379614078266
-----DNN_model-----
MSE:  50.26939277141776
RMSE:  7.0900911680610825
MAE:  4.919763518142984
R2_score:  0.6397567155742946


# 2번 주제: 총 구매금액

In [6]:
data02_offline, data02_online, data03_offline, data03_online = pre_buy_amount_round()

data02_offline

In [10]:
x_train, x_test, y_train, y_test = data_split(data02_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 44, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 3, units = 300), x_test, y_test)

-----RF_model-----
MSE:  0.03414273023157042
RMSE:  0.18477751549247115
MAE:  0.10336543873874311
R2_score:  0.9749956549996245
-----XGB_model-----
MSE:  0.05054178351746251
RMSE:  0.22481499842640063
MAE:  0.1463730486391746
R2_score:  0.9629858484241436
-----LGB_model-----
MSE:  0.07683551018218308
RMSE:  0.27719218997328027
MAE:  0.18499609896692046
R2_score:  0.9437297019938173
-----CAT_model-----
MSE:  0.04708176177250162
RMSE:  0.2169833214154987
MAE:  0.14395205806129727
R2_score:  0.9655197868887309
-----DNN_model-----
MSE:  0.09799024717893276
RMSE:  0.31303393934034174
MAE:  0.21863694194459599
R2_score:  0.928237082081136


data02_online

In [7]:
x_train, x_test, y_train, y_test = data_split(data02_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 44, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 4, units = 400), x_test, y_test)

-----RF_model-----
MSE:  0.029092117296441636
RMSE:  0.1705641149141332
MAE:  0.0977723216886449
R2_score:  0.9772388971111204
-----XGB_model-----
MSE:  0.03584549738491247
RMSE:  0.18932907168449453
MAE:  0.12439446003441203
R2_score:  0.9719551847750576
-----LGB_model-----
MSE:  0.069011418835615
RMSE:  0.262700245214227
MAE:  0.18356988091689577
R2_score:  0.9460068172893995
-----CAT_model-----
MSE:  0.03504321062247813
RMSE:  0.18719831896274639
MAE:  0.1263140168193499
R2_score:  0.9725828782275513
-----DNN_model-----
MSE:  0.10576919285727693
RMSE:  0.3252217595076887
MAE:  0.24214649511911399
R2_score:  0.9172482546881288


data03_offline

In [8]:
x_train, x_test, y_train, y_test = data_split(data03_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 41, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.1), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 4, units = 300), x_test, y_test)

-----RF_model-----
MSE:  0.04136030320994245
RMSE:  0.20337232655880802
MAE:  0.11574836018971259
R2_score:  0.9705311200220896
-----XGB_model-----
MSE:  0.04788547869668117
RMSE:  0.21882750900350983
MAE:  0.14086004648878991
R2_score:  0.9658819855058978
-----LGB_model-----
MSE:  0.07248814428754634
RMSE:  0.2692362239512847
MAE:  0.18731542415857147
R2_score:  0.9483527861730556
-----CAT_model-----
MSE:  0.04508811587978421
RMSE:  0.21233962390421673
MAE:  0.1414326029499509
R2_score:  0.9678750837839101
-----DNN_model-----
MSE:  0.09953898434055505
RMSE:  0.31549799419418667
MAE:  0.22399157434455913
R2_score:  0.9290792824277508


data03_online

In [9]:
x_train, x_test, y_train, y_test = data_split(data03_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train, max_depth = 31, n_estimators = 110, min_samples_split = 2), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.1), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train, max_depth = 11, learning_rate = 0.5), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train, max_depth = 11, learning_rate = 0.1), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train, hidden = 4, units = 200), x_test, y_test)

-----RF_model-----
MSE:  0.04344909562690574
RMSE:  0.20844446652983079
MAE:  0.12730458724187615
R2_score:  0.9498476956864529
-----XGB_model-----
MSE:  0.03962267811430214
RMSE:  0.19905446017183875
MAE:  0.1265351981973035
R2_score:  0.9542644425198196
-----LGB_model-----
MSE:  0.0530548146577692
RMSE:  0.2303363077280028
MAE:  0.16611832333383686
R2_score:  0.9387600323637674
-----CAT_model-----
MSE:  0.039111508211721074
RMSE:  0.19776629695608167
MAE:  0.13529552347000315
R2_score:  0.9548544743292341
-----DNN_model-----
MSE:  0.11798729968601715
RMSE:  0.343492794227211
MAE:  0.2594516051947586
R2_score:  0.8638099395716186
