<a href="https://colab.research.google.com/github/minsoojo/MLprogramming/blob/main/boston.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

import pandas as pd



from sklearn.datasets import load_diabetes

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import RandomForestRegressor

from sklearn.linear_model import LinearRegression

from sklearn.pipeline import make_pipeline

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



# -----------------------------

# 1) 데이터 준비 (회귀용)

# -----------------------------

# 예시: 당뇨병 회귀 데이터셋 (다운로드 없이 사용 가능)

df = pd.read_csv("/content/drive/MyDrive/3-2/MLprogramming/boston.csv").dropna()


X = df.drop(columns=["PRICE"])
X = X.iloc[:, 1:]
y = df["PRICE"]



X_train, X_test, y_train, y_test = train_test_split(

    X, y, test_size=0.2, random_state=42

)



# -----------------------------

# 2) 모델 구성 (회귀 모델)

# -----------------------------

dt = DecisionTreeRegressor(random_state=42)

rf = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)



# 선형회귀는 스케일링과 함께 파이프라인 구성 권장

lr = make_pipeline(StandardScaler(with_mean=True, with_std=True), LinearRegression())



# -----------------------------

# 3) 모델 학습

# -----------------------------

dt.fit(X_train, y_train)

rf.fit(X_train, y_train)

lr.fit(X_train, y_train)



# -----------------------------

# 4) 모델 평가 함수

# -----------------------------

def eval_reg(y_true, y_pred):

    mae  = mean_absolute_error(y_true, y_pred)

    rmse = mean_squared_error(y_true, y_pred) # Removed squared=False

    r2   = r2_score(y_true, y_pred)

    return mae, rmse, r2



dt_mae, dt_rmse, dt_r2 = eval_reg(y_test, dt.predict(X_test))

rf_mae, rf_rmse, rf_r2 = eval_reg(y_test, rf.predict(X_test))

lr_mae, lr_rmse, lr_r2 = eval_reg(y_test, lr.predict(X_test))



print("=== Test Metrics (Regression) ===")

print("[Decision Tree]")

print(f"MAE: {dt_mae:.3f} | RMSE: {dt_rmse:.3f} | R^2: {dt_r2:.3f}")



print("[Random Forest]")

print(f"MAE: {rf_mae:.3f} | RMSE: {rf_rmse:.3f} | R^2: {rf_r2:.3f}")



print("[Linear Regression]")

print(f"MAE: {lr_mae:.3f} | RMSE: {lr_rmse:.3f} | R^2: {lr_r2:.3f}")

=== Test Metrics (Regression) ===
[Decision Tree]
MAE: 0.455 | RMSE: 0.495 | R^2: 0.622
[Random Forest]
MAE: 0.327 | RMSE: 0.253 | R^2: 0.807
[Linear Regression]
MAE: 0.533 | RMSE: 0.556 | R^2: 0.576
