<a href="https://colab.research.google.com/github/suyeon1002/uoxhns/blob/main/2_homework_2024671028.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np

# 데이터 로딩 및 분할
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# 정규화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 리스트 및 결과 저장용 dict
models = {
    'LinearRegression': LinearRegression(),
    'Ridge': None,  # 후에 하이퍼파라미터 튜닝
    'Lasso': None
}
results = {}

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)
results['LinearRegression'] = {
    'model': lr_model,
    'r2': r2_score(y_test, y_pred_lr),
    'mse': mean_squared_error(y_test, y_pred_lr)
}

# Ridge Regression with best alpha
best_r2_ridge = -np.inf
best_alpha_ridge = None
for alpha in [0.01, 0.1, 1.0, 10.0, 100.0]:
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train_scaled, y_train)
    y_pred_ridge = ridge_model.predict(X_test_scaled)
    score = r2_score(y_test, y_pred_ridge)
    if score > best_r2_ridge:
        best_r2_ridge = score
        best_alpha_ridge = alpha
        best_ridge_model = ridge_model

results['Ridge'] = {
    'model': best_ridge_model,
    'r2': best_r2_ridge,
    'mse': mean_squared_error(y_test, best_ridge_model.predict(X_test_scaled)),
    'alpha': best_alpha_ridge
}

# Lasso Regression with best alpha
best_r2_lasso = -np.inf
best_alpha_lasso = None
for alpha in [0.01, 0.1, 1.0, 10.0, 100.0]:
    lasso_model = Lasso(alpha=alpha, max_iter=10000)
    lasso_model.fit(X_train_scaled, y_train)
    y_pred_lasso = lasso_model.predict(X_test_scaled)
    score = r2_score(y_test, y_pred_lasso)
    if score > best_r2_lasso:
        best_r2_lasso = score
        best_alpha_lasso = alpha
        best_lasso_model = lasso_model

results['Lasso'] = {
    'model': best_lasso_model,
    'r2': best_r2_lasso,
    'mse': mean_squared_error(y_test, best_lasso_model.predict(X_test_scaled)),
    'alpha': best_alpha_lasso
}

# 결과 출력
for name, res in results.items():
    print(f"{name}: R² = {res['r2']:.4f}, MSE = {res['mse']:.4f}")
    if 'alpha' in res:
        print(f"    Best alpha: {res['alpha']}")

# 최고 성능 모델 확인
best_model_name = max(results, key=lambda x: results[x]['r2'])
print(f"\n>> 가장 높은 R² score를 보인 모델은 '{best_model_name}' 입니다.")


LinearRegression: R² = 0.4526, MSE = 2900.1936
Ridge: R² = 0.4605, MSE = 2858.2243
    Best alpha: 100.0
Lasso: R² = 0.4669, MSE = 2824.5681
    Best alpha: 1.0

>> 가장 높은 R² score를 보인 모델은 'Lasso' 입니다.
