<a href="https://colab.research.google.com/github/shunnyK/AI/blob/main/weather_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
import numpy as np

# -----------------------------
# 1) 데이터 불러오기 & 전처리
# -----------------------------
df = pd.read_csv("/content/drive/MyDrive/weather.csv")

# RainTomorrow 제거
if "RainTomorrow" in df.columns:
    df = df.drop(columns=["RainTomorrow"])

# 결측치 제거
df = df.dropna()

# 라벨(y): 강수량(mm), 입력(X): 나머지
y = df["RISK_MM"]
X = df.drop(columns=["RISK_MM"])

X = pd.get_dummies(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 2) 모델 구성 4개
# -----------------------------
models = {
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=200, random_state=42),
    "Linear Reg.": Pipeline([
        ("scaler", StandardScaler()),
        ("reg", LinearRegression())
    ]),
    "KNN": Pipeline([
        ("scaler", StandardScaler()),
        ("reg", KNeighborsRegressor(n_neighbors=5))
    ])
}

# -----------------------------
# 3) 학습 & 평가
# -----------------------------
print("=== Test Performance ===")
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, preds))  # 오차 크기
    r2 = r2_score(y_test, preds)

    print(f"{name:14s} | RMSE: {rmse:.4f} | R²: {r2:.4f}")

=== Test Performance ===
Decision Tree  | RMSE: 3.9054 | R²: -0.6321
Random Forest  | RMSE: 2.9650 | R²: 0.0593
Linear Reg.    | RMSE: 2.9807 | R²: 0.0493
KNN            | RMSE: 3.1437 | R²: -0.0576
