# Climate Change Modeling

In [None]:
# 02 — Modeling
DATA_PATH = "../data/clean_climate.csv"  # produced by 01 notebook
import pandas as pd, numpy as np
from src.data_preprocessing import basic_cleaning, train_val_test_split, scale_numeric
from src.modeling import train_random_forest, evaluate, save_artifacts
from src.feature_engineering import add_time_lags, add_rolling_features

df = pd.read_csv(DATA_PATH)
df = basic_cleaning(df)

# Choose target
target = next((c for c in df.columns if 'anomaly' in c.lower() or 'target' in c.lower()), df.select_dtypes('number').columns.tolist()[0])
print('Target:', target)

# Optional time features
df = add_time_lags(df, target)
df = add_rolling_features(df, target)

# Keep numeric only and drop NAs for baseline
df_num = df.select_dtypes(include=[np.number]).dropna()
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(df_num, target)
Xtr, Xv, Xte, scaler = scale_numeric(X_train, X_val, X_test)

# Train RF
result = train_random_forest(Xtr, y_train, Xv, y_val)
result['val_metrics']

In [None]:
# Evaluate on test set
test_metrics = evaluate(result['model'], Xte, y_test)
test_metrics

In [None]:
# Save artifacts
save_artifacts(result['model'], scaler, out_dir='../artifacts', prefix='rf')
print('Artifacts saved: ../artifacts/rf_model.pkl, ../artifacts/rf_scaler.pkl')