In [3]:
from xgboost.sklearn import XGBRegressor
import pandas as pd

In [4]:
import os    
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [5]:
import torch
from model.model_load import load_model
from configs.setting import global_setting
from libs.common.project_paths import GetPaths

import pandas as pd
import matplotlib.pyplot as plt
from libs import train_utils

import joblib

In [6]:
import yaml

def load_config(cfg_path):
    return yaml.full_load(open(cfg_path, 'r', encoding='utf-8-sig'))


In [7]:
df = pd.read_excel('res/total_set.xlsx')

In [16]:
config_file = 'cfg.yaml'
config_path = GetPaths().get_configs_folder(config_file)

config = load_config(config_path)

In [17]:
config['DATA']['X_COLS']

['Vertical', 'Horizontal', 'Module', 'Outdoor']

# Dataset

In [19]:
from sklearn.preprocessing import MinMaxScaler
from libs.common.common import scaler_save

In [20]:
train, valid, test = train_utils.split_dataset(df, config)

In [21]:
weather_cols = config['DATA']['X_COLS']
target_cols = config['DATA']['Y_TARGET']

train_weather_x = train[weather_cols]
valid_weather_x = valid[weather_cols]
test_weather_x = test[weather_cols]

train_weather_y = train[target_cols]
valid_weather_y = valid[target_cols]
test_weather_y = test[target_cols]

# Set Scaler
sc_x = MinMaxScaler()
sc_x.fit(train_weather_x.values)
sc_y = MinMaxScaler()
sc_y.fit(train_weather_y.values)
scaler_save(sc_x, name='x_scaler')
scaler_save(sc_y, name='y_scaler')

In [22]:
train_x = sc_x.transform(train_weather_x)
valid_x = sc_x.transform(valid_weather_x)
test_x = sc_x.transform(test_weather_x)

# train_y = train_weather_y[target_cols].values
# valid_y = valid_weather_y[target_cols].values
# test_y = test_weather_y[target_cols].values

train_y = sc_y.transform(train_weather_y[target_cols].values)
valid_y = sc_y.transform(valid_weather_y[target_cols].values)
test_y = sc_y.transform(test_weather_y[target_cols].values)

eval_set = [(valid_x, valid_y)]



In [23]:
reg = XGBRegressor(n_estimators=50, ## 붓스트랩 샘플 개수 또는 base_estimator 개수
                   max_depth=5, ## 개별 나무의 최대 깊이
                   gamma = 0, ## gamma
                   importance_type='gain', ## gain, weight, cover, total_gain, total_cover
                   reg_lambda = 1, ## tuning parameter of l2 penalty                   
                   random_state=100
                  ).fit(train_x,train_y,
                        eval_set = eval_set)

[0]	validation_0-rmse:0.31524
[1]	validation_0-rmse:0.22198
[2]	validation_0-rmse:0.15736
[3]	validation_0-rmse:0.11294
[4]	validation_0-rmse:0.08316
[5]	validation_0-rmse:0.06375
[6]	validation_0-rmse:0.05174
[7]	validation_0-rmse:0.04474
[8]	validation_0-rmse:0.04102
[9]	validation_0-rmse:0.03909
[10]	validation_0-rmse:0.03815
[11]	validation_0-rmse:0.03770
[12]	validation_0-rmse:0.03755
[13]	validation_0-rmse:0.03747
[14]	validation_0-rmse:0.03744
[15]	validation_0-rmse:0.03742
[16]	validation_0-rmse:0.03740
[17]	validation_0-rmse:0.03740
[18]	validation_0-rmse:0.03741
[19]	validation_0-rmse:0.03738
[20]	validation_0-rmse:0.03742
[21]	validation_0-rmse:0.03743
[22]	validation_0-rmse:0.03739
[23]	validation_0-rmse:0.03739
[24]	validation_0-rmse:0.03741
[25]	validation_0-rmse:0.03741
[26]	validation_0-rmse:0.03744
[27]	validation_0-rmse:0.03744
[28]	validation_0-rmse:0.03745
[29]	validation_0-rmse:0.03745
[30]	validation_0-rmse:0.03746
[31]	validation_0-rmse:0.03747
[32]	validation_0-

# Save Model

In [24]:
os.makedirs('ckpt/ml', exist_ok=True)

filename = 'ckpt/ml/xgb_2nd.model'
reg.save_model(filename)

# Model Test

In [25]:
model1_path = 'ckpt/ml/2stage-1/xgb_1st.model'
model2_path = 'ckpt/ml/2stage-1/xgb_2nd.model'

In [26]:
model1 = XGBRegressor()
model1.load_model(model1_path)

model2 = XGBRegressor()
model2.load_model(model2_path)

In [29]:
weather_cols = ['rain', 'wind', 'humidity', 'pressure', 'snow', 'cloud', 'visibility']
target_cols = ['total']

test_weather_x = test[weather_cols]
test_weather_y = test[target_cols]

In [31]:
s_x = 'ckpt/ml/2stage-1/SCALER_1/x_scaler.pkl'
s_y = 'ckpt/ml/2stage-1/SCALER_2/y_scaler.pkl'

sc_x = joblib.load(s_x)
sc_y = joblib.load(s_y)

test_x = sc_x.transform(test_weather_x)
test_y = sc_y.transform(test_weather_y[target_cols].values)



In [32]:
pred_ = model1.predict(test_x)
pred = model2.predict(pred_)



# Get Accuracy

In [35]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
def get_accuracy(y_hat, target):
    mae = mean_absolute_error(target, y_hat)
    mse = mean_squared_error(target, y_hat)
    rmse = np.sqrt(mse)
    
#     mape = torch.mean(torch.abs((target - y_hat) / target)) * 100

    acc = [round(mae, 4), round(mse, 4), round(rmse, 4)]
    return acc

In [36]:
get_accuracy(pred, test_y)

[0.0491, 0.0039, 0.0627]

0.06194925823000602