## Hyper parameter tuning for 7 ML Models
- tuner: keras-tuner / hyperopt  
- ML Models:
    * RF (Random Forest)
    * XGBoost
    * LightGBM
    * LR (Linear Regression)
    * SVR (Support Vector Regression)
    * DNN (Deep Neural Network)
    * AE (Auto Encoder)

---

### 0-1. Library Import

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import (
    mean_absolute_error, 
    mean_squared_error, 
    mean_absolute_percentage_error, 
    root_mean_squared_error
)

import xgboost as xgb
from xgboost import XGBRegressor, plot_importance
import lightgbm as lgb

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.python.client import device_lib

import keras_tuner as kt
from keras_tuner import Hyperband

from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from hyperopt.pyll.base import scope

In [3]:
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

In [4]:
def print_test_results(y_t, pred):
    print(f'[Test results]\n'
          f'MAPE: {round(mean_absolute_percentage_error(y_t, pred), 2)}\n'
          f'MAE: {round(mean_absolute_error(y_t, pred), 2)}\n'
          f'MSE: {round(mean_squared_error(y_t, pred), 2)}\n'
          f'RMSE: {round(root_mean_squared_error(y_t, pred), 2)}\n')

def print_test_results2(y_t, pred):
    print(f'[Test results]\n'
          f'MAPE: {round(mean_absolute_percentage_error(y_t, pred), 4)}\n'
          f'MAE: {round(mean_absolute_error(y_t, pred), 4)}\n'
          f'MSE: {"%.4e"%mean_squared_error(y_t, pred)}\n'
          f'MSE: {round(mean_squared_error(y_t, pred), 4)}\n'
          f'RMSE: {round(root_mean_squared_error(y_t, pred), 4)}\n')

### 0-2. Get GPU

In [5]:
# GPU Test
print(tf.__version__)
print("==========================================")
print(tf.test.is_gpu_available())
print("==========================================")
print(tf.test.is_built_with_cuda())
print("==========================================")
print(tf.test.gpu_device_name())
print("==========================================")
device_lib.list_local_devices()

2.9.3
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True
True
/device:GPU:0


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3719748589297895349
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 10057940992
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 6475095686526821415
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:01:00.0, compute capability: 8.6"
 xla_global_id: 416903419]

### 0-3. Data import

In [6]:
raw_df = pd.read_excel('Data_평가만료일_특허유지일.xlsx')
df = raw_df.iloc[:,2:-1]

In [7]:
X_train, X_test, y_train,y_test = train_test_split(df, raw_df.iloc[:,-1], test_size = 0.2, random_state=7)
train_cnt = y_train.count()
test_cnt = y_test.count()

# scaler = StandardScaler()
# scaler = MinMaxScaler()
scaler = RobustScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(188621, 27)
(47156, 27)
(188621,)
(47156,)


---

## 1. RF - hyperopt

In [11]:
# 목적 함수 정의
def objective(params):
    # Random Forest 모델 생성
    model = RandomForestRegressor(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        min_samples_split=int(params['min_samples_split']),
        min_samples_leaf=int(params['min_samples_leaf']),
        random_state=7,
        n_jobs=-1
    )
    
    # 모델 학습
    model.fit(X_train, y_train)
    
    # 예측 및 평가
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    
    return {'loss': mae, 'status': STATUS_OK}

# 하이퍼파라미터 공간 정의
param_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 50, 300, 10)),
    'max_depth': scope.int(hp.quniform('max_depth', 5, 30, 1)),
    'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)),
    'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 5, 1))
}

# 하이퍼파라미터 튜닝 실행
trials = Trials()
best_rf_params = fmin(
    fn=objective,
    space=param_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

print("Best parameters:", best_rf_params)


100%|██████████| 50/50 [21:34<00:00, 25.89s/trial, best loss: 863.8608398361515]
Best parameters: {'max_depth': 30.0, 'min_samples_leaf': 1.0, 'min_samples_split': 2.0, 'n_estimators': 280.0}


## 2. XGBoost - hyperopt

In [12]:
print(f"XGBoost Version: {xgb.__version__}")
try:
    dtrain = QuantileDMatrix(data=[[1, 2], [3, 4]], label=[0, 1])
    print("GPU support is enabled.")
except Exception as e:
    print("GPU support is not enabled:", e)

XGBoost Version: 2.1.1
GPU support is not enabled: name 'QuantileDMatrix' is not defined


In [13]:
# 목적 함수 정의
def objective(params):
    # XGBoost 모델 생성
    model = xgb.XGBRegressor(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        # tree_method='gpu_hist',  # GPU 사용을 위한 옵션
        random_state=7
    )
    
    # 모델 학습
    model.fit(X_train, y_train)
    
    # 예측 및 평가
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    
    return {'loss': mae, 'status': STATUS_OK}

# 하이퍼파라미터 공간 정의
param_space = {
    'n_estimators': hp.quniform('n_estimators', 50, 300, 10),
    'max_depth': hp.quniform('max_depth', 3, 15, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0)
}

# 하이퍼파라미터 튜닝 실행
trials = Trials()
best_xgb_params = fmin(
    fn=objective,
    space=param_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

# 최적의 파라미터 출력
print("Best parameters:", best_xgb_params)

100%|██████████| 50/50 [03:34<00:00,  4.29s/trial, best loss: 864.7019704648927]
Best parameters: {'colsample_bytree': 0.8780425750115004, 'learning_rate': 0.03259162240984821, 'max_depth': 14.0, 'n_estimators': 290.0, 'subsample': 0.9229737811346409}


## 3. LightGBM - hyperopt

In [14]:
# 목적 함수 정의
def objective(params):
    # LightGBM 모델 생성
    model = lgb.LGBMRegressor(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        device='gpu',  # GPU 사용을 위한 옵션
        random_state=7
    )
    
    # 모델 학습
    model.fit(X_train, y_train)
    
    # 예측 및 평가
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    
    return {'loss': mae, 'status': STATUS_OK}

# 하이퍼파라미터 공간 정의
param_space = {
    'n_estimators': hp.quniform('n_estimators', 50, 300, 10),
    'max_depth': hp.quniform('max_depth', 3, 15, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0)
}

# 하이퍼파라미터 튜닝 실행
trials = Trials()
best_lgb_params = fmin(
    fn=objective,
    space=param_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

# 최적의 파라미터 출력
print("Best parameters:", best_lgb_params)

[LightGBM] [Info] This is the GPU trainer!!           
[LightGBM] [Info] Total Bins 3454                     
[LightGBM] [Info] Number of data points in the train set: 188621, number of used features: 27
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built        
[LightGBM] [Info] Size of histogram bin entry: 8      
[LightGBM] [Info] 17 dense feature groups (3.60 MB) transferred to GPU in 0.002705 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 2810.457325
[LightGBM] [Info] This is the GPU trainer!!                                    
[LightGBM] [Info] Total Bins 3454                                              
[LightGBM] [Info] Number of data points in the train set: 188621, number of used features: 27
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling 

## 4. Linear Regression

In [15]:
lr_model = LinearRegression()
lr_model.fit(X_train,y_train)

lr_predict=lr_model.predict(X_test)

In [16]:
print_test_results2(y_test,lr_predict)

[Test results]
MAPE: 0.4056
MAE: 956.044
MSE: 1.3530e+06
MSE: 1352985.6445
RMSE: 1163.1791



## 5. SVR - hyperopt

In [7]:
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# 목적 함수 정의
def objective(params):
    # SVR 모델 생성 (스케일링 포함)
    model = make_pipeline(
        StandardScaler(),  # 데이터를 스케일링
        SVR(
            C=params['C'],
            epsilon=params['epsilon'],
            kernel=params['kernel']
        )
    )
    
    # 모델 학습
    model.fit(X_train, y_train)
    
    # 예측 및 평가
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    
    return {'loss': mae, 'status': STATUS_OK}

# 하이퍼파라미터 공간 정의
param_space = {
    'C': hp.loguniform('C', -3, 3),  # 0.05부터 약 20까지
    'epsilon': hp.uniform('epsilon', 0.01, 1.0),
    'kernel': hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])
}

# 하이퍼파라미터 튜닝 실행
trials = Trials()
best_svr_params = fmin(
    fn=objective,
    space=param_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

# 최적의 파라미터 출력
print("Best parameters:", best_svr_params)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

## 6. DNN - keras tuner

In [17]:
# 하이퍼모델 정의
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units_1', min_value=64, max_value=256, step=32),
                    input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(units=hp.Int('units_2', min_value=32, max_value=128, step=16), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(units=hp.Int('units_3', min_value=16, max_value=64, step=8), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_3', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(units=hp.Int('units_4', min_value=8, max_value=32, step=4), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_4', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(1, activation='relu'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
        ),
        loss='mae',
        metrics=['mae']
    )
    return model

# 하이퍼파라미터 탐색 설정
dnn_tuner = Hyperband(
    build_model,
    objective='val_mae',
    max_epochs=200,
    factor=3,
    directory='tuner_results',
    project_name='DNN_hyperparameter_tuning'
)

# 콜백 정의
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
checkpoint = ModelCheckpoint("DNN_model_best.h5", monitor='val_loss', save_best_only=True, mode='min')

# 튜너 검색 수행
dnn_tuner.search(
    X_train, y_train,
    epochs=50,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, checkpoint]
)

# 최적 하이퍼파라미터로 모델 학습
best_dnn_hps = dnn_tuner.get_best_hyperparameters(num_trials=1)[0]
dnn_model = dnn_tuner.hypermodel.build(best_dnn_hps)
history = dnn_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, checkpoint]
)
print(best_dnn_hps.values)
# 모델 예측
y_pred = dnn_model.predict(X_test)
print_test_results(y_test, y_pred)

Trial 254 Complete [00h 03m 21s]
val_mae: 898.6165161132812

Best val_mae So Far: 883.7008056640625
Total elapsed time: 05h 39m 57s
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
{'units_1': 128, 'dropout_1': 0.30000000000000004, 'units_2': 96, 'dropout_2': 0.2, 'units_3': 56, 'dropout_3': 0.5, 'units_4': 24, 'dropout_4': 0.2, 'learning_rate': 0.0012536297097257307, 'tuner/epochs': 200, 'tuner/initial_epoch': 67, 'tuner/bracket': 4, 'tuner/round': 4, 'tuner/trial_id': '0145'}
[Test results]
MAPE: 0.34
MAE: 969.75
MSE: 1557023.28
RMSE: 1247.81



In [20]:
dnn_model = dnn_tuner.hypermodel.build(best_dnn_hps)
history = dnn_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint]
)

# 모델 예측
y_pred = dnn_model.predict(X_test)
print_test_results(y_test, y_pred)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

## 7. Auto Encoder - keras tuner

In [None]:
# EarlyStopping 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Autoencoder 모델 정의 함수
def build_autoencoder(hp):
    encoding_dim = hp.Choice('encoding_dim', [16, 32, 64, 128])  # 튜닝 가능한 encoding_dim
    input_layer = Input(shape=(X_train.shape[1],))  # 입력 크기 설정
    encoded = Dense(encoding_dim, activation='relu')(input_layer)  # 인코더 레이어
    decoded = Dense(X_train.shape[1], activation=hp.Choice('decoder_activation', ['sigmoid', 'relu']))(encoded)  # 디코더 레이어
    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),  # 튜닝 가능한 학습률
        loss=MeanAbsoluteError()
    )
    return autoencoder

# Regressor 모델 정의 함수
def build_regressor(hp):
    model = Sequential()
    model.add(Dense(
        hp.Int('units_1', min_value=32, max_value=128, step=32),
        activation='relu',
        input_dim=fixed_encoding_dim  # Autoencoder의 최적 encoding_dim을 사용
    ))
    for i in range(hp.Int('num_layers', 1, 3)):  # 튜닝 가능한 히든 레이어 개수
        model.add(Dense(
            hp.Int(f'units_{i+2}', min_value=16, max_value=64, step=16),
            activation='relu'
        ))
    model.add(Dense(1, activation='linear'))  # 출력 레이어
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),  # 튜닝 가능한 학습률
        loss=MeanAbsoluteError(),
        metrics=['mae']
    )
    return model

# Autoencoder 튜닝
autoencoder_tuner = kt.Hyperband(
    build_autoencoder,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='my_dir',
    project_name='autoencoder_tuning'
)

# Autoencoder 튜닝 수행
autoencoder_tuner.search(X_train, X_train, validation_split=0.3, epochs=50, batch_size=256)

# Autoencoder 최적 하이퍼파라미터 획득 및 모델 생성
best_hp_autoencoder = autoencoder_tuner.get_best_hyperparameters(1)[0]
autoencoder = autoencoder_tuner.hypermodel.build(best_hp_autoencoder)

# 인코더 추출
encoder = Model(autoencoder.input, autoencoder.layers[1].output)

# 인코딩된 데이터 생성
encoded_X_train = encoder.predict(X_train)
encoded_X_test = encoder.predict(X_test)

# Autoencoder의 최적 encoding_dim 사용
fixed_encoding_dim = best_hp_autoencoder.get('encoding_dim')

# Regressor 모델 정의 함수 (encoding_dim 고정)
def build_regressor(hp):
    model = Sequential()
    model.add(Dense(
        hp.Int('units_1', min_value=32, max_value=128, step=32),
        activation='relu',
        input_dim=fixed_encoding_dim  # Autoencoder의 encoding_dim과 동일하게 고정
    ))
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(
            hp.Int(f'units_{i+2}', min_value=16, max_value=64, step=16),
            activation='relu'
        ))
    model.add(Dense(1, activation='linear'))
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss=MeanAbsoluteError(),
        metrics=['mae']
    )
    return model

# Regressor 튜닝 수행
regressor_tuner = kt.Hyperband(
    build_regressor,
    objective='val_mae',
    max_epochs=50,
    factor=3,
    directory='my_dir',
    project_name='regressor_tuning'
)

# Regressor 튜닝 및 학습
regressor_tuner.search(encoded_X_train, y_train, validation_split=0.3, epochs=50, batch_size=32)

# 최적 하이퍼파라미터 적용 및 학습
best_hp_regressor = regressor_tuner.get_best_hyperparameters(1)[0]
regressor = regressor_tuner.hypermodel.build(best_hp_regressor)
regressor.fit(encoded_X_train, y_train, epochs=100, batch_size=32, validation_split=0.3, callbacks=[early_stopping])

# 예측
auto_encoder_predictions = regressor.predict(encoded_X_test)
print("Best parameters:", best_hp_autoencoder.values)
print_test_results(y_test, auto_encoder_predictions)

Trial 90 Complete [00h 05m 47s]
val_mae: 934.9107666015625

Best val_mae So Far: 928.7645874023438
Total elapsed time: 01h 43m 55s
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
[Test results]
MAPE: 0.38
MAE: 928.25
MSE: 1394592.92
RMSE: 1180.93



## 8. Model Test

In [None]:
rf_model = RandomForestRegressor(n_estimators=280, 
                                 max_depth=30, 
                                 min_samples_split=2, 
                                 min_samples_leaf=1)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
print("Random Forest Results:")
print_test_results(y_test, rf_pred)

In [27]:
xgb_model = xgb.XGBRegressor(n_estimators=290, 
                             max_depth=14, 
                             learning_rate=0.03259162240984821, 
                             subsample=0.7206064233722951, 
                             colsample_bytree=0.9229737811346409)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
print("XGBoost Results:")
print_test_results(y_test, xgb_pred)

XGBoost Results:
[Test results]
MAPE: 0.36
MAE: 864.87
MSE: 1203907.86
RMSE: 1097.23



In [28]:
lgb_model = lgb.LGBMRegressor(n_estimators=280,
                              max_depth=15,
                              learning_rate=0.23326321741873107,
                              subsample=0.7771220043606767,
                              colsample_bytree=0.8787077808247973,
                              device='gpu')
lgb_model.fit(X_train, y_train)
lgb_pred = lgb_model.predict(X_test)
print("LightGBM Results:")
print_test_results(y_test, lgb_pred)

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3454
[LightGBM] [Info] Number of data points in the train set: 188621, number of used features: 27
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3060, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 17 dense feature groups (3.60 MB) transferred to GPU in 0.004665 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 2810.457325
LightGBM Results:
[Test results]
MAPE: 0.38
MAE: 892.75
MSE: 1247610.01
RMSE: 1116.96



In [8]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# SVR 모델 생성 (고정된 하이퍼파라미터 사용)
model = make_pipeline(
    StandardScaler(),  # 데이터를 스케일링
    SVR(
        C=19.62581040283463,        
        epsilon=0.6967828360688615,  
        kernel='rbf'                 
    )
)

# 모델 학습
model.fit(X_train, y_train)

# 예측 및 평가
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

# 결과 출력
print("Mean Absolute Error (MAE):", mae)


Mean Absolute Error (MAE): 932.1085014427887


In [9]:
print_test_results2(y_test, y_pred)

[Test results]
MAPE: 0.3763
MAE: 932.1085
MSE: 1.3722e+06
MSE: 1372223.5726
RMSE: 1171.4195

