In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# -----------------------------
# 1) 데이터 준비
# -----------------------------
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/diabetes.csv")

# 입력(X): Outcome, BMI 제외 / 타깃(y): BMI
X = df.drop(columns=["Outcome", "BMI"])
y = df["BMI"]

# (선택) Pima 데이터의 '0'을 결측으로 보고 처리할 피처들
zero_as_nan = ["Glucose", "BloodPressure", "SkinThickness", "Insulin"]  # BMI는 y라 제외

for c in zero_as_nan:
    if c in X.columns:
        X.loc[X[c] == 0, c] = pd.NA  # 0을 결측으로 표기

# train/test 분할 (연속형 타깃이므로 stratify 사용 X)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 2) 모델 구성 (회귀)
# -----------------------------
# 결측 대치 + 모델을 파이프라인으로 구성
dt = make_pipeline(SimpleImputer(strategy="median"),
                   DecisionTreeRegressor(random_state=42))

rf = make_pipeline(SimpleImputer(strategy="median"),
                   RandomForestRegressor(n_estimators=200, random_state=42))

lr = make_pipeline(SimpleImputer(strategy="median"),
                   LinearRegression())

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

# -----------------------------
# 4) 모델 평가 (회귀 지표)
# -----------------------------
def report(name, y_true, y_pred):
    mae  = mean_absolute_error(y_true, y_pred)
    mse  = mean_squared_error(y_true, y_pred)   # MSE (no 'squared' arg)
    rmse = np.sqrt(mse)                         # RMSE 계산
    r2   = r2_score(y_true, y_pred)
    print(f"{name:>14} | MAE={mae:.3f} | RMSE={rmse:.3f} | R²={r2:.3f}")

print("=== Test Performance ===")
report("Decision Tree", y_test, dt.predict(X_test))
report("Random Forest", y_test, rf.predict(X_test))
report("Linear Reg.",   y_test, lr.predict(X_test))