# Weather 데이터 분류

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier

In [2]:
# 데이터 불러오기
weather_df = pd.read_csv("weather.csv")

In [3]:
weather_df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,11/1/2007,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,29,1019.7,1015.0,7,7,14.4,23.6,No,3.6,Yes
1,11/2/2007,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,36,1012.4,1008.4,5,3,17.5,25.7,Yes,3.6,Yes
2,11/3/2007,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,69,1009.5,1007.2,8,7,15.4,20.2,Yes,39.8,Yes
3,11/4/2007,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,56,1005.5,1007.0,2,7,13.5,14.1,Yes,2.8,Yes
4,11/5/2007,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,49,1018.3,1018.5,7,7,11.1,15.4,Yes,0.0,No


In [4]:
df = weather_df.drop(["Date", "Location"], axis=1).dropna()
X = df.drop("RainTomorrow", axis=1)
y = df["RainTomorrow"]

In [5]:
# 범주형 처리
for col in X.select_dtypes(include="object").columns:
    X[col] = LabelEncoder().fit_transform(X[col])
y = LabelEncoder().fit_transform(y)

In [6]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# 스케일링
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
# 분류 모델 학습
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)
pred = model.predict(X_test_scaled)

In [9]:
# 결과
print("===== Weather Classification =====")
print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))

===== Weather Classification =====
Accuracy: 0.9848484848484849
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        56
           1       1.00      0.90      0.95        10

    accuracy                           0.98        66
   macro avg       0.99      0.95      0.97        66
weighted avg       0.99      0.98      0.98        66

