In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
import pandas as pd

df = pd.read_csv("./weather_data_all.csv")
target_col = "평균 상대습도(%)"
drop_cols = ["지점", "지점명", "일시", target_col]
feature_cols = [col for col in df.columns if col not in drop_cols]

valid_features = df[feature_cols].loc[:, df[feature_cols].isna().mean() < 0.3].columns.tolist()

humidity_df = df[["지점명", "일시", target_col] + valid_features].dropna()
X = humidity_df[valid_features]
y = humidity_df[target_col]

imputer = SimpleImputer(strategy="mean")
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=valid_features)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_imputed, y)
importances = model.feature_importances_

importance_df = pd.DataFrame({
    "Feature": valid_features,
    "Importance": importances
}).sort_values(by="Importance", ascending=False)


importance_df.head(10)


Unnamed: 0,Feature,Importance
15,최소 상대습도(%),0.834922
17,평균 증기압(hPa),0.037458
0,평균기온(°C),0.029401
14,평균 이슬점온도(°C),0.022215
21,최저 해면기압(hPa),0.008268
1,최저기온(°C),0.008126
24,가조시간(hr),0.004847
26,평균 전운량(1/10),0.004788
18,평균 현지기압(hPa),0.00416
16,최소 상대습도 시각(hhmi),0.003935


In [6]:

future_dates = pd.date_range(start="2026-09-01", end="2026-11-30")
future_df = pd.DataFrame({"일시": future_dates})
future_df["month"] = future_df["일시"].dt.month
future_df["day"] = future_df["일시"].dt.day
future_df["dayofyear"] = future_df["일시"].dt.dayofyear

# 변수 중요도 상위 5개를 기반으로 입력 데이터 구성
top_features = importance_df["Feature"].head(5).tolist()

# 예측 결과 저장 리스트
humidity_predictions = []

# 지점별 상대습도 예측
for station in humidity_df["지점명"].unique():
    station_df = humidity_df[humidity_df["지점명"] == station].copy()

    # 입력 및 타겟 데이터
    X = station_df[top_features]
    y = station_df["평균 상대습도(%)"]
    
    # 결측치 처리
    imputer = SimpleImputer(strategy="mean")
    X = imputer.fit_transform(X)
    
    # 모델 학습
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    # 미래 입력값 생성
    future_df_copy = future_df.copy()
    future_df_copy["지점명"] = station

    # 필요한 컬럼 생성: 상위 특성 중 평균값 사용
    for feature in top_features:
        if feature in station_df.columns:
            future_df_copy[feature] = station_df[feature].mean()
    
    # 입력 구성 및 예측
    X_future = imputer.transform(future_df_copy[top_features])
    future_df_copy["예측 평균 상대습도(%)"] = model.predict(X_future)
    
    humidity_predictions.append(future_df_copy[["지점명", "일시", "예측 평균 상대습도(%)"]])

predicted_humidity_2026 = pd.concat(humidity_predictions, ignore_index=True)
predicted_humidity_2026.head()


Unnamed: 0,지점명,일시,예측 평균 상대습도(%)
0,제주,2026-09-01,74.857
1,제주,2026-09-02,74.857
2,제주,2026-09-03,74.857
3,제주,2026-09-04,74.857
4,제주,2026-09-05,74.857


In [9]:
import numpy as np
def calculate_effective_humidity(df, r=0.7):
    weights = np.array([r**i for i in range(5)])
    He = (1 - r) * sum(df["예측 평균 상대습도(%)"].shift(i).fillna(method='bfill') * weights[i] for i in range(5))
    return He

# 지점별로 실효습도 계산
effective_humidity_results = []

for station in predicted_humidity_2026["지점명"].unique():
    station_df = predicted_humidity_2026[predicted_humidity_2026["지점명"] == station].copy()
    station_df = station_df.sort_values("일시").reset_index(drop=True)

    # 실효습도 계산
    station_df["실효습도"] = calculate_effective_humidity(station_df, r=0.7)

    # 건조주의보 발생 조건: 실효습도 ≤ 25가 이틀 이상 지속
    station_df["건조주의보"] = (
        (station_df["실효습도"] <= 25) &
        (station_df["실효습도"].shift(1) <= 25)
    ).astype(int)

    effective_humidity_results.append(station_df)

# 결과 병합
dry_warning_2026 = pd.concat(effective_humidity_results, ignore_index=True)


dry_warning_2026.to_csv("건조주의보_확인용", index=False)


  He = (1 - r) * sum(df["예측 평균 상대습도(%)"].shift(i).fillna(method='bfill') * weights[i] for i in range(5))
  He = (1 - r) * sum(df["예측 평균 상대습도(%)"].shift(i).fillna(method='bfill') * weights[i] for i in range(5))
  He = (1 - r) * sum(df["예측 평균 상대습도(%)"].shift(i).fillna(method='bfill') * weights[i] for i in range(5))
  He = (1 - r) * sum(df["예측 평균 상대습도(%)"].shift(i).fillna(method='bfill') * weights[i] for i in range(5))


Unnamed: 0,지점명,일시,예측 평균 상대습도(%),실효습도,건조주의보
0,제주,2026-09-01,74.857,62.275784,0
1,제주,2026-09-02,74.857,62.275784,0
2,제주,2026-09-03,74.857,62.275784,0
3,제주,2026-09-04,74.857,62.275784,0
4,제주,2026-09-05,74.857,62.275784,0
