# Gradient Boosting Models for Flood Prediction

This notebook demonstrates how to use XGBoost and CatBoost models for flood prediction.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import yaml

# Add project root to path
project_root = Path.cwd().parent.parent
sys.path.append(str(project_root))

from src.data.loader import DataLoader
from src.models.train import ModelTrainer
from src.models.advanced_models import XGBoostModel, CatBoostModel
from src.utils.preprocessing import process_river, extract_time_features

## 1. Load Configuration

In [None]:
# Load model configuration
with open(project_root / 'config' / 'model_config.yaml') as f:
    config = yaml.safe_load(f)

xgb_params = config['xgboost']
catboost_params = config['catboost']

print('XGBoost parameters:', xgb_params)
print('\nCatBoost parameters:', catboost_params)

## 2. Load and Preprocess Data

In [None]:
# Initialize data loader
loader = DataLoader()

# Load data for a specific station
station_id = "your_station_id"  # Replace with actual station ID
data = loader.load_station_data(station_id)

# Preprocess data
processed_data = process_river(data)

# Add time-based features
feature_data = extract_time_features(processed_data)

# Define feature columns
feature_columns = [
    'hour_sin', 'hour_cos',
    'day_sin', 'day_cos',
    'month_sin', 'month_cos',
    'is_weekend'
]

print('Available features:', feature_columns)

## 3. Train XGBoost Model

In [None]:
# Initialize trainer and model
trainer = ModelTrainer(experiment_name="flood-prediction-xgboost")
xgb_model = XGBoostModel(**xgb_params)

# Train and evaluate
xgb_metrics = trainer.train_model(
    station_id=station_id,
    model=xgb_model,
    data=feature_data,
    feature_columns=feature_columns
)

print('\nXGBoost Performance Metrics:')
for metric, value in xgb_metrics.items():
    print(f'{metric}: {value:.4f}')

## 4. Train CatBoost Model

In [None]:
# Initialize trainer and model
trainer = ModelTrainer(experiment_name="flood-prediction-catboost")
catboost_model = CatBoostModel(**catboost_params)

# Train and evaluate
catboost_metrics = trainer.train_model(
    station_id=station_id,
    model=catboost_model,
    data=feature_data,
    feature_columns=feature_columns
)

print('\nCatBoost Performance Metrics:')
for metric, value in catboost_metrics.items():
    print(f'{metric}: {value:.4f}')

## 5. Compare Models

To compare models in MLflow UI:
1. Open a terminal
2. Navigate to the project root
3. Run: `mlflow ui`
4. Open http://localhost:5000 in your browser

You can compare:
- Model parameters
- Performance metrics
- Feature importance
- Forecast plots