In [1]:
import numpy as np
import tensorflow as tf
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow.keras as keras
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings('ignore')
import import_ipynb
from Preprocessing import pre_buy_num, pre_buy_amount_round, data_split
    
def RF_model(x_train, y_train):
    model = RandomForestRegressor(random_state=2022)
    model.fit(x_train,y_train)
    return model

def XGB_model(x_train, y_train):
    model=XGBRegressor(random_state=2022)
    model.fit(x_train,y_train)
    return model

def LGB_model(x_train, y_train):
    model=LGBMRegressor(random_state=2022)
    model.fit(x_train,y_train)
    return model

def CAT_model(x_train, y_train):
    model=CatBoostRegressor(random_state=2022, silent=True)
    model.fit(x_train,y_train)
    return model

def DNN_model(x_train, y_train):
    np.random.seed(2022)
    tf.random.set_seed(2022)
    initializer = tf.keras.initializers.GlorotUniform(seed=2022)
    
    early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=5)
    check_point = ModelCheckpoint('temp/DNN_temp.h5',monitor='val_loss',mode='min',save_best_only=True)
    
    model = Sequential()
    model.add(keras.layers.Dense(units=100, activation='relu', input_shape=(x_train.shape[1],), kernel_initializer=initializer))
    model.add(keras.layers.Dense(units=1, activation='linear', kernel_initializer=initializer))
    model.compile(optimizer='adam', loss='mse', metrics='mae')
    model.fit(x_train, y_train, epochs=50, batch_size=128, validation_split=0.2, callbacks=[early_stopping,check_point], verbose=0)
    return model

def model_evaluate(model, x_test, y_test):
    pred = model.predict(x_test)
    print("MSE: ", mean_squared_error(y_test, pred))
    print("RMSE: ", np.sqrt(mean_squared_error(y_test, pred)))
    print("MAE: ", mean_absolute_error(y_test, pred))
    print("R2_score: ", r2_score(y_test, pred))

importing Jupyter notebook from Preprocessing.ipynb


# 1번 주제: 총 구매횟수

In [2]:
data02_offline, data02_online, data03_offline, data03_online = pre_buy_num()

data02_offline

In [3]:
x_train, x_test, y_train, y_test = data_split(data02_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  13.84251716770187
RMSE:  3.7205533416014704
MAE:  1.8821490894656185
R2_score:  0.9438583801888627
-----XGB_model-----
MSE:  68.98854621729146
RMSE:  8.305934397603407
MAE:  4.869756863244332
R2_score:  0.7202005468997179
-----LGB_model-----
MSE:  82.77025808346383
RMSE:  9.097816116160176
MAE:  5.202362240739461
R2_score:  0.664305537447059
-----CAT_model-----
MSE:  65.53041609791191
RMSE:  8.095085922824532
MAE:  4.796084014309135
R2_score:  0.7342258158639955
-----DNN_model-----
MSE:  102.51207514445832
RMSE:  10.124824696974182
MAE:  5.669729792974035
R2_score:  0.5842379041985766


data02_online

In [4]:
x_train, x_test, y_train, y_test = data_split(data02_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  7.718150226692536
RMSE:  2.778155903957252
MAE:  1.3683880263965649
R2_score:  0.889416124389553
-----XGB_model-----
MSE:  21.599691805026044
RMSE:  4.6475468588305855
MAE:  2.817989779436137
R2_score:  0.6905245995950806
-----LGB_model-----
MSE:  26.20689425242496
RMSE:  5.119266964363644
MAE:  3.1828480956385854
R2_score:  0.6245136659657624
-----CAT_model-----
MSE:  22.15271541851609
RMSE:  4.706667124252159
MAE:  2.872127324734051
R2_score:  0.6826010048621973
-----DNN_model-----
MSE:  39.87115160831099
RMSE:  6.314360744233021
MAE:  3.7613334698409657
R2_score:  0.42873533937571817


data03_offline

In [5]:
x_train, x_test, y_train, y_test = data_split(data03_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  20.369943549641523
RMSE:  4.513307384794605
MAE:  2.3467338172449868
R2_score:  0.9058455822452045
-----XGB_model-----
MSE:  51.52549761561063
RMSE:  7.178126330429873
MAE:  5.0401541945332555
R2_score:  0.7618376695202331
-----LGB_model-----
MSE:  71.31736573470523
RMSE:  8.444960967032662
MAE:  6.017845106948321
R2_score:  0.6703552452075823
-----CAT_model-----
MSE:  54.52944580634833
RMSE:  7.384405582465546
MAE:  5.266995898928726
R2_score:  0.7479527516668669
-----DNN_model-----
MSE:  110.74395040322132
RMSE:  10.523495160982463
MAE:  6.868243323111162
R2_score:  0.48811678615256904


data03_online

In [6]:
x_train, x_test, y_train, y_test = data_split(data03_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  12.288876450949953
RMSE:  3.505549379334137
MAE:  2.0159946109755094
R2_score:  0.9119347783904599
-----XGB_model-----
MSE:  19.96086615467022
RMSE:  4.4677585157067545
MAE:  3.0495849689300676
R2_score:  0.8569553442541515
-----LGB_model-----
MSE:  26.830723654546265
RMSE:  5.1798381880659425
MAE:  3.6901901874038554
R2_score:  0.8077241939885161
-----CAT_model-----
MSE:  22.022623433237207
RMSE:  4.692826806226415
MAE:  3.3278004153012706
R2_score:  0.8421802659655218
-----DNN_model-----
MSE:  77.59755198168739
RMSE:  8.808947268640413
MAE:  6.110999892519587
R2_score:  0.4439161595528246


# 2번 주제: 총 구매금액

In [7]:
data02_offline, data02_online, data03_offline, data03_online = pre_buy_amount_round()

data02_offline

In [8]:
x_train, x_test, y_train, y_test = data_split(data02_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  0.03417455169779966
RMSE:  0.18486360295579998
MAE:  0.10342610783022109
R2_score:  0.9749723506266403
-----XGB_model-----
MSE:  0.07666811258547172
RMSE:  0.27689007310749103
MAE:  0.18482191768882014
R2_score:  0.9438522952144537
-----LGB_model-----
MSE:  0.08585170199434505
RMSE:  0.29300461087557145
MAE:  0.19854330547529336
R2_score:  0.9371267159662852
-----CAT_model-----
MSE:  0.07192825329195242
RMSE:  0.26819443188096287
MAE:  0.1777120175543252
R2_score:  0.9473235195783097
-----DNN_model-----
MSE:  0.11236926630130228
RMSE:  0.3352152536823202
MAE:  0.22165985367622887
R2_score:  0.9177066425859877


data02_online

In [9]:
x_train, x_test, y_train, y_test = data_split(data02_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  0.029136521432258353
RMSE:  0.17069423374050557
MAE:  0.09778133112706358
R2_score:  0.9772041561847823
-----XGB_model-----
MSE:  0.06937625044634269
RMSE:  0.2633937175529111
MAE:  0.18286543434348498
R2_score:  0.9457213801813241
-----LGB_model-----
MSE:  0.09137199953157964
RMSE:  0.3022780169505875
MAE:  0.2106288301721149
R2_score:  0.9285123368193171
-----CAT_model-----
MSE:  0.07158790518766865
RMSE:  0.2675591620327524
MAE:  0.1849497134883815
R2_score:  0.943991024820487
-----DNN_model-----
MSE:  0.17635764220062639
RMSE:  0.41994957102088626
MAE:  0.3231935364735282
R2_score:  0.8620212341897975


data03_offline

In [10]:
x_train, x_test, y_train, y_test = data_split(data03_offline)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  0.04148668014558896
RMSE:  0.2036827929541152
MAE:  0.1159164525613243
R2_score:  0.9704410774822746
-----XGB_model-----
MSE:  0.07391037057959446
RMSE:  0.2718646181090773
MAE:  0.1877802721598611
R2_score:  0.9473394614957906
-----LGB_model-----
MSE:  0.0854264254548271
RMSE:  0.2922779934494335
MAE:  0.2039549452816859
R2_score:  0.9391343659670015
-----CAT_model-----
MSE:  0.07459379843154292
RMSE:  0.27311865266133495
MAE:  0.18744332589571633
R2_score:  0.9468525247042395
-----DNN_model-----
MSE:  0.16311948120762243
RMSE:  0.40388052838385563
MAE:  0.310407626745243
R2_score:  0.8837786950118173


data03_online

In [11]:
x_train, x_test, y_train, y_test = data_split(data03_online)
print('-----RF_model-----')
model_evaluate(RF_model(x_train, y_train), x_test, y_test)
print('-----XGB_model-----')
model_evaluate(XGB_model(x_train, y_train), x_test, y_test)
print('-----LGB_model-----')
model_evaluate(LGB_model(x_train, y_train), x_test, y_test)
print('-----CAT_model-----')
model_evaluate(CAT_model(x_train, y_train), x_test, y_test)
print('-----DNN_model-----')
model_evaluate(DNN_model(x_train, y_train), x_test, y_test)

-----RF_model-----
MSE:  0.043625323171527106
RMSE:  0.20886675937431284
MAE:  0.12762037531738935
R2_score:  0.9496442802339846
-----XGB_model-----
MSE:  0.050951204548767226
RMSE:  0.22572373501421428
MAE:  0.16247841421055353
R2_score:  0.9411881817376841
-----LGB_model-----
MSE:  0.061264849520428294
RMSE:  0.24751737215886138
MAE:  0.18213283305315356
R2_score:  0.9292833755791798
-----CAT_model-----
MSE:  0.054699727887532724
RMSE:  0.2338797295353591
MAE:  0.17043000180565634
R2_score:  0.9368613463801311
-----DNN_model-----
MSE:  0.12040291365607986
RMSE:  0.34699122994116127
MAE:  0.26486079226854214
R2_score:  0.8610216512267715
