In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# -----------------------------
# 1) 데이터 준비
# -----------------------------
# boston.csv 파일 읽기
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/boston.csv").dropna()

# 특징(X)과 타겟(y) 분리
# 마지막 열이 집값(label)
X = df.drop(columns=[df.columns[-1]])
y = df[df.columns[-1]]

# 데이터셋 분할 (훈련: 80%, 테스트: 20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42)
lr = LinearRegression()

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

# -----------------------------
# 4) 모델 평가
# -----------------------------
# 예측값 계산
dt_pred = dt.predict(X_test)
rf_pred = rf.predict(X_test)
lr_pred = lr.predict(X_test)

# 평가 지표 계산 (MSE와 R²)
def evaluate(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name:15s} | MSE: {mse:.4f} | R²: {r2:.4f}")

print("=== Test Performance ===")
evaluate(y_test, dt_pred, "Decision Tree")
evaluate(y_test, rf_pred, "Random Forest")
evaluate(y_test, lr_pred, "Linear Regression")

=== Test Performance ===
Decision Tree   | MSE: 0.4818 | R²: 0.6323
Random Forest   | MSE: 0.2400 | R²: 0.8168
Linear Regression | MSE: 0.5547 | R²: 0.5767
