# Deep Learning Models for Flood Prediction

This notebook demonstrates how to use LSTM and Transformer models for flood prediction.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import yaml

# Add project root to path
project_root = Path.cwd().parent.parent
sys.path.append(str(project_root))

from src.data.loader import DataLoader
from src.models.train import ModelTrainer
from src.models.advanced_models import LSTMModel, TransformerModel
from src.utils.preprocessing import process_river, extract_time_features

## 1. Load Configuration

In [None]:
# Load model configuration
with open(project_root / 'config' / 'model_config.yaml') as f:
    config = yaml.safe_load(f)

lstm_params = config['lstm']
transformer_params = config['transformer']

print('LSTM parameters:', lstm_params)
print('\nTransformer parameters:', transformer_params)

## 2. Load and Preprocess Data

In [None]:
# Initialize data loader
loader = DataLoader()

# Load data for a specific station
station_id = "your_station_id"  # Replace with actual station ID
data = loader.load_station_data(station_id)

# Preprocess data
processed_data = process_river(data)

# Add time-based features
feature_data = extract_time_features(processed_data)

# Define feature columns
feature_columns = [
    'hour_sin', 'hour_cos',
    'day_sin', 'day_cos',
    'month_sin', 'month_cos',
    'is_weekend',
    'value'  # Include the target variable for sequence models
]

print('Available features:', feature_columns)

## 3. Train LSTM Model

In [None]:
# Initialize trainer and model
trainer = ModelTrainer(experiment_name="flood-prediction-lstm")
lstm_model = LSTMModel(
    input_size=len(feature_columns),
    **lstm_params
)

# Train and evaluate
lstm_metrics = trainer.train_model(
    station_id=station_id,
    model=lstm_model,
    data=feature_data,
    feature_columns=feature_columns
)

print('\nLSTM Performance Metrics:')
for metric, value in lstm_metrics.items():
    print(f'{metric}: {value:.4f}')

## 4. Train Transformer Model

In [None]:
# Initialize trainer and model
trainer = ModelTrainer(experiment_name="flood-prediction-transformer")
transformer_model = TransformerModel(
    input_size=len(feature_columns),
    **transformer_params
)

# Train and evaluate
transformer_metrics = trainer.train_model(
    station_id=station_id,
    model=transformer_model,
    data=feature_data,
    feature_columns=feature_columns
)

print('\nTransformer Performance Metrics:')
for metric, value in transformer_metrics.items():
    print(f'{metric}: {value:.4f}')

## 5. Model Analysis

### 5.1 Compare Sequence Lengths

Different sequence lengths can significantly impact model performance:

In [None]:
sequence_lengths = [24, 48, 72, 168]  # 1 day, 2 days, 3 days, 1 week

for seq_len in sequence_lengths:
    print(f'\nTesting sequence length: {seq_len} hours')
    
    # Update LSTM parameters
    lstm_params['sequence_length'] = seq_len
    lstm_model = LSTMModel(input_size=len(feature_columns), **lstm_params)
    
    # Train and evaluate
    metrics = trainer.train_model(
        station_id=f'{station_id}_seq{seq_len}',
        model=lstm_model,
        data=feature_data,
        feature_columns=feature_columns
    )
    
    print('RMSE:', metrics['rmse'])

## 6. View Results in MLflow

To compare all models in MLflow UI:
1. Open a terminal
2. Navigate to the project root
3. Run: `mlflow ui`
4. Open http://localhost:5000 in your browser

You can compare:
- Model architectures
- Training parameters
- Performance metrics
- Learning curves
- Forecast plots

### Key Observations:
1. LSTM vs Transformer:
   - LSTM might be better for shorter sequences
   - Transformer might handle longer dependencies better
   
2. Sequence Length Impact:
   - Shorter sequences: More responsive to recent changes
   - Longer sequences: Better at capturing seasonal patterns
   
3. Training Considerations:
   - Deep learning models need more data
   - Consider using GPU for faster training
   - May need to tune learning rate and batch size