In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def evaluate_model(y_true, y_pred):
  """
  연속 분류 지표로 모델의 성능을 평가하는 함수

  Args:
    y_true : 실제 값
    y_pred : 예측 값

  """

  # 평균 제곱 오차(MSE) 계산
  mse = np.mean((y_true - y_pred) ** 2)

  # 평균 절대 오차(MAE) 계산
  mae = np.mean(np.abs(y_true - y_pred))

  # 근사 오차(RMSE) 계산
  rmse = np.sqrt(mse)

  # 결정계수(R^2) 계산
  y_bar = np.mean(y_true)
  ss_tot = np.sum((y_true - y_bar) ** 2)
  ss_res = np.sum((y_true - y_pred)  ** 2)
  r2 = 1 - ss_res / ss_tot

  # 평균 절대 백분율 오차(MAPE) 계산
  mape = 100 * np.mean(np.abs(y_true - y_pred) / y_true)
  print('-------------------------------------')
  print("평균 제곱 오차(MSE) : {0}\n평균 절대 오차(MAE) : {1}\n근사 오차(RMSE) : {2}\n결정계수(R^2) : {3}\n평균 절대 오차(MAPE) : {4}".format(mse, mae, rmse, r2, mape))
  print('-------------------------------------')

# LightGBM

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
import lightgbm as lgb

# CSV 파일에서 데이터셋 로드
data = pd.read_csv('/content/drive/MyDrive/semi_project/data_APT.csv', encoding='utf-8')

# Prepare input features (X) and target variable (y)
X = data.drop(columns=['JS_Price'])
y = data['JS_Price']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameter search space
hyperparameter_grid = {
    'n_estimators': [0.4],
    'learning_rate': [1.8],
    'num_leaves': [64],
    'max_depth': [1300],
    'reg_alpha': [10],
    'reg_lambda': [0.05]
}

# Initialize a RandomizedSearchCV object
lgb_model_rscv = RandomizedSearchCV(
    lgb.LGBMRegressor(random_state=1000),
    hyperparameter_grid,
    cv=5,
    n_iter=1000,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

# Fit the model to the training data
lgb_model_rscv.fit(X_train.values, y_train)

# Make predictions on the test data
y_pred_lgb_model_rscv = lgb_model_rscv.best_estimator_.predict(X_test.values)


evaluate_model(y_test, y_pred_lgb_model_rscv)