In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

file_path = '/content/drive/MyDrive/weather.csv'
df = pd.read_csv(file_path)

# 1. 독립변수/종속변수 분리
X = df.drop("RainTomorrow", axis=1)
y = df["RainTomorrow"]

# 2. 범주형 변수 처리 (Label Encoding)
for col in X.select_dtypes(include=["object"]).columns:
    X[col] = LabelEncoder().fit_transform(X[col].astype(str))

y = LabelEncoder().fit_transform(y.astype(str))  # Yes/No → 1/0

# 3. 결측치 처리 (평균 대체)
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)

# 4. 표준화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. train/test 분리
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# 6. K-NN 모델 학습
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# 7. 예측 및 평가
y_pred = knn.predict(X_test)

print("✅ 혼동행렬")
print(confusion_matrix(y_test, y_pred))

print("\n✅ 분류 리포트")
print(classification_report(y_test, y_pred))




✅ 혼동행렬
[[56  2]
 [10  6]]

✅ 분류 리포트
              precision    recall  f1-score   support

           0       0.85      0.97      0.90        58
           1       0.75      0.38      0.50        16

    accuracy                           0.84        74
   macro avg       0.80      0.67      0.70        74
weighted avg       0.83      0.84      0.82        74



In [8]:
from sklearn.metrics import accuracy_score

# Explore different k values
k_values = range(1, 21)
accuracy_scores = []

for k in k_values:
    # Train KNN model with current k
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

# Display the accuracy scores for each k
for k, accuracy in zip(k_values, accuracy_scores):
    print(f"k = {k}: Accuracy = {accuracy:.4f}")

k = 1: Accuracy = 0.7838
k = 2: Accuracy = 0.8108
k = 3: Accuracy = 0.8108
k = 4: Accuracy = 0.8378
k = 5: Accuracy = 0.8378
k = 6: Accuracy = 0.8378
k = 7: Accuracy = 0.8378
k = 8: Accuracy = 0.8378
k = 9: Accuracy = 0.8514
k = 10: Accuracy = 0.8514
k = 11: Accuracy = 0.8649
k = 12: Accuracy = 0.8378
k = 13: Accuracy = 0.8514
k = 14: Accuracy = 0.8378
k = 15: Accuracy = 0.8378
k = 16: Accuracy = 0.8378
k = 17: Accuracy = 0.8378
k = 18: Accuracy = 0.8108
k = 19: Accuracy = 0.8243
k = 20: Accuracy = 0.8243


In [11]:
# Train KNN model with the best k (k=11)
best_k = 11
knn_best = KNeighborsClassifier(n_neighbors=best_k)
knn_best.fit(X_train, y_train)

# Predict and evaluate
y_pred_best = knn_best.predict(X_test)

print(f"✅ 혼동행렬 (k={best_k})")
print(confusion_matrix(y_test, y_pred_best))

print(f"\n✅ 분류 리포트 (k={best_k})")
print(classification_report(y_test, y_pred_best))

✅ 혼동행렬 (k=11)
[[58  0]
 [10  6]]

✅ 분류 리포트 (k=11)
              precision    recall  f1-score   support

           0       0.85      1.00      0.92        58
           1       1.00      0.38      0.55        16

    accuracy                           0.86        74
   macro avg       0.93      0.69      0.73        74
weighted avg       0.88      0.86      0.84        74

