In [None]:
# 필요한 라이브러리 불러오기
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import pandas as pd

# 데이터 로드 및 전처리
data = pd.read_csv('/content/realmerge_level.csv',  encoding='cp949')

# 독립변수와 종속변수 분리 X=input, y=target
X = data[['전용면적(㎡)', '층_2019', '층_2020', '층_2021', '층_2022', '층_2023', '건축년도','X','Y']]
y = data[['최종 집값(만원)_2019', '최종 집값(만원)_2020', '최종 집값(만원)_2021', '최종 집값(만원)_2022', '최종 집값(만원)_2023']]

# 학습 데이터와 테스트 데이터로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# RandomizedSearchCV를 사용한 XGBoost 하이퍼파라미터 튜닝
param_dist = {
    'learning_rate': [0.1, 0.01, 0.05],
    'max_depth': [3, 5, 7],
    'n_estimators': [50, 100, 200, 300]
}

random_search = RandomizedSearchCV(XGBRegressor(), param_distributions=param_dist, n_iter=10, cv=3, random_state=42)
random_search.fit(X_train, y_train.values[:,0])  # 2019년도 데이터에 대한 훈련

best_params_xgb_random = random_search.best_params_
print(f'Best Hyperparameters - XGBoost (Random Search): {best_params_xgb_random}')

# 조정된 XGBoost 모델 학습
xgb_model_tuned_random = XGBRegressor(**best_params_xgb_random, random_state=42)
xgb_model_tuned_random.fit(X_train, y_train.values[:,0])

# 조정된 XGBoost 모델의 예측 및 평가
xgb_predictions_tuned_random = xgb_model_tuned_random.predict(X_test)
xgb_mse_tuned_random = mean_squared_error(y_test.values[:,0], xgb_predictions_tuned_random)
print(f'Tuned XGBoost Mean Squared Error (Random Search): {xgb_mse_tuned_random}')

# GridSearchCV를 사용한 XGBoost 하이퍼파라미터 튜닝
param_grid = {
    'learning_rate': [0.1, 0.01, 0.05],
    'max_depth': [3, 5, 7],
    'n_estimators': [50, 100, 200, 300]
}

grid_search = GridSearchCV(XGBRegressor(), param_grid, cv=3)
grid_search.fit(X_train, y_train.values[:,0])

best_params_xgb = grid_search.best_params_
print(f'Best Hyperparameters - XGBoost (Grid Search): {best_params_xgb}')

# 조정된 XGBoost 모델 학습
xgb_model_tuned = XGBRegressor(**best_params_xgb)
xgb_model_tuned.fit(X_train, y_train.values[:,0])

# 조정된 XGBoost 모델의 예측 및 평가
xgb_predictions_tuned = xgb_model_tuned.predict(X_test)
xgb_mse_tuned = mean_squared_error(y_test.values[:,0], xgb_predictions_tuned)
print(f'Tuned XGBoost Mean Squared Error (Grid Search): {xgb_mse_tuned}')

# 라쏘 회귀 모델 학습
lasso_model_23 = Lasso(alpha=10)  # alpha 값은 조절 가능
lasso_model_23.fit(X_train, y_train.values[:,0])  # 2019년도 데이터에 대한 훈련

# 표준화
ss = StandardScaler()
ss.fit(X_train)
X_train_scaled = ss.transform(X_train)
X_test_scaled = ss.transform(X_test)

# 라쏘 회귀 모델 평가
lasso_predictions = lasso_model_23.predict(X_test_scaled)
lasso_mse = mean_squared_error(y_test.values[:,0], lasso_predictions)
print(f"Lasso Regression Mean Squared Error: {lasso_mse}")

# 테스트 데이터에 대한 예측값 생성
y_pred = lasso_model_23.predict(X_test_scaled)

# 결정 계수(R2) 계산
r2_lasso = r2_score(y_test.values[:,0], y_pred)
print(f'R-squared (R2) - Lasso: {r2_lasso}')

# 결과 출력 및 시각화
print('Best Hyperparameters - XGBoost (Grid Search):', best_params_xgb)
print('Best Hyperparameters - XGBoost (Random Search):', best_params_xgb_random)
print(f'Grid Search XGBoost Mean Squared Error: {xgb_mse_tuned}')
print(f'Random Search XGBoost Mean Squared Error: {xgb_mse_tuned_random}')

# 전체 데이터에 대한 예측값 생성
xgb_predictions_grid = xgb_model_tuned.predict(X)  # 수정된 부분
xgb_predictions_random = xgb_model_tuned_random.predict(X)  # 수정된 부분

# 전체 데이터에 대한 실제 값
y_actual = y.values[:,0]

# Grid Search XGBoost 모델 평가
r2_xgb_grid = r2_score(y_actual, xgb_predictions_grid)
mse_xgb_grid = mean_squared_error(y_actual, xgb_predictions_grid)
print(f'Grid Search XGBoost R-squared (R2): {r2_xgb_grid}')
print(f'Grid Search XGBoost Mean Squared Error: {mse_xgb_grid}')

# Random Search XGBoost 모델 평가
r2_xgb_random = r2_score(y_actual, xgb_predictions_random)
mse_xgb_random = mean_squared_error(y_actual, xgb_predictions_random)
print(f'Random Search XGBoost R-squared (R2): {r2_xgb_random}')
print(f'Random Search XGBoost Mean Squared Error: {mse_xgb_random}')



Best Hyperparameters - XGBoost (Random Search): {'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.05}
Tuned XGBoost Mean Squared Error (Random Search): 43370293.38270316
Best Hyperparameters - XGBoost (Grid Search): {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Tuned XGBoost Mean Squared Error (Grid Search): 35474497.11612108
Lasso Regression Mean Squared Error: 1552941352461.3267
R-squared (R2) - Lasso: -9378.010421216499
Best Hyperparameters - XGBoost (Grid Search): {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Best Hyperparameters - XGBoost (Random Search): {'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.05}
Grid Search XGBoost Mean Squared Error: 35474497.11612108
Random Search XGBoost Mean Squared Error: 43370293.38270316
Grid Search XGBoost R-squared (R2): 0.8766976799867677
Grid Search XGBoost Mean Squared Error: 14204180.222256044
Random Search XGBoost R-squared (R2): 0.921767530505486
Random Search XGBoost Mean Squared Error: 9012223



In [None]:
# 사용자 입력 받기
user_input = {
    '전용면적(㎡)': float(input('전용면적(㎡)을 입력하세요: ')),
    '층_2023': float(input('층_2023을 입력하세요: ')),
    '건축년도': int(input('건축년도를 입력하세요: ')),
    'X': float(input('X 값을 입력하세요: ')),
    'Y': float(input('Y 값을 입력하세요: '))
}

# XGBoost 모델을 사용하여 주택 가격 예측을 위한 입력 데이터 정렬
xgb_input = pd.DataFrame([user_input], columns=X.columns)
xgb_predicted_price = xgb_model_tuned.predict(xgb_input)[0]

# 예측된 주택 가격 출력
print(f'XGBoost로 예측된 주택 가격 (만원): {xgb_predicted_price}')



전용면적(㎡)을 입력하세요: 48.42
층_2023을 입력하세요: 2
건축년도를 입력하세요: 2017
X 값을 입력하세요: 126.824862
Y 값을 입력하세요: 37.496253
XGBoost로 예측된 주택 가격 (만원): 25952.236328125
