In [1]:
from xgboost.sklearn import XGBRegressor
import pandas as pd

In [2]:
import os    
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [3]:
import torch
from model.model_load import load_model
from configs.setting import global_setting
from libs.common.project_paths import GetPaths

import pandas as pd
import matplotlib.pyplot as plt
from libs import train_utils

import joblib

In [4]:
import yaml

def load_config(cfg_path):
    return yaml.full_load(open(cfg_path, 'r', encoding='utf-8-sig'))


In [5]:
df = pd.read_excel('res/total_set.xlsx')

In [6]:
config_file = 'cfg.yaml'
config_path = GetPaths().get_configs_folder(config_file)

config = load_config(config_path)

# Dataset

In [19]:
from sklearn.preprocessing import MinMaxScaler
from libs.common.common import scaler_save

In [16]:
train, valid, test = train_utils.split_dataset(df, config)

In [20]:
weather_cols = config['DATA']['X_COLS']
target_cols = config['DATA']['Y_TARGET']

train_weather_x = train[weather_cols]
valid_weather_x = valid[weather_cols]
test_weather_x = test[weather_cols]

train_weather_y = train[target_cols]
valid_weather_y = valid[target_cols]
test_weather_y = test[target_cols]

# Set Scaler
sc_x = MinMaxScaler()
sc_x.fit(train_weather_x.values)
sc_y = MinMaxScaler()
sc_y.fit(train_weather_y.values)
scaler_save(sc_x, name='x_scaler')
scaler_save(sc_y, name='y_scaler')

In [23]:
train_x = sc_x.transform(train_weather_x)
valid_x = sc_x.transform(valid_weather_x)
test_x = sc_x.transform(test_weather_x)

# train_y = train_weather_y[target_cols].values
# valid_y = valid_weather_y[target_cols].values
# test_y = test_weather_y[target_cols].values

train_y = sc_y.transform(train_weather_y[target_cols].values)
valid_y = sc_y.transform(valid_weather_y[target_cols].values)
test_y = sc_y.transform(test_weather_y[target_cols].values)

eval_set = [(valid_x, valid_y)]



In [35]:
reg = XGBRegressor(n_estimators=50, ## 붓스트랩 샘플 개수 또는 base_estimator 개수
                   max_depth=5, ## 개별 나무의 최대 깊이
                   gamma = 0, ## gamma
                   importance_type='gain', ## gain, weight, cover, total_gain, total_cover
                   reg_lambda = 1, ## tuning parameter of l2 penalty                   
                   random_state=100
                  ).fit(train_x,train_y,
                        eval_set = eval_set)

[0]	validation_0-rmse:0.31864
[1]	validation_0-rmse:0.22864
[2]	validation_0-rmse:0.16735
[3]	validation_0-rmse:0.12697
[4]	validation_0-rmse:0.10127
[5]	validation_0-rmse:0.08597
[6]	validation_0-rmse:0.07742
[7]	validation_0-rmse:0.07266
[8]	validation_0-rmse:0.07010
[9]	validation_0-rmse:0.06891
[10]	validation_0-rmse:0.06837
[11]	validation_0-rmse:0.06804
[12]	validation_0-rmse:0.06788
[13]	validation_0-rmse:0.06778
[14]	validation_0-rmse:0.06766
[15]	validation_0-rmse:0.06752
[16]	validation_0-rmse:0.06762
[17]	validation_0-rmse:0.06760
[18]	validation_0-rmse:0.06761
[19]	validation_0-rmse:0.06774
[20]	validation_0-rmse:0.06800
[21]	validation_0-rmse:0.06797
[22]	validation_0-rmse:0.06801
[23]	validation_0-rmse:0.06795
[24]	validation_0-rmse:0.06795
[25]	validation_0-rmse:0.06797
[26]	validation_0-rmse:0.06818
[27]	validation_0-rmse:0.06820
[28]	validation_0-rmse:0.06821
[29]	validation_0-rmse:0.06831
[30]	validation_0-rmse:0.06826
[31]	validation_0-rmse:0.06805
[32]	validation_0-

# Save Model

In [40]:
os.makedirs('ckpt/ml', exist_ok=True)

filename = 'ckpt/ml/xgb_baseline.model'
reg.save_model(filename)

# Model Test

In [42]:
model = XGBRegressor() # 모델 초기화
model.load_model(filename)

In [48]:
pred = model.predict(test_x)

In [49]:
RMSE = mean_squared_error(test_y, pred)**0.5

# Get Accuracy

In [65]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
def get_accuracy(y_hat, target):
    mae = mean_absolute_error(target, y_hat)
    mse = mean_squared_error(target, y_hat)
    rmse = np.sqrt(mse)
    
#     mape = torch.mean(torch.abs((target - y_hat) / target)) * 100

    acc = [round(mae, 4), round(mse, 4), round(rmse, 4)]
    return acc

In [66]:
get_accuracy(pred, test_y)

[0.0488, 0.0038, 0.0619]

0.06194925823000602