# Forest Fire Detection System - Main Workflow

This Jupyter Notebook serves as the main entry point for the Forest Fire Detection System. It orchestrates the entire workflow, from data preprocessing and dataset creation to model training, evaluation, and hyperparameter tuning. Each step can be run independently or sequentially.
https://firms.modaps.eosdis.nasa.gov/descriptions/FIRMS_VIIRS_Firehotspots.html

acquired time is UTC

## 1. Setup and Configuration

In [None]:
import os
import sys
import logging

# Add 'aad' directory to sys.path for module imports
sys.path.insert(0, os.path.abspath("aad"))

from aad.common.config import Config
from aad.common.reload_all import reload_all
import aad.autoencoder.trainer_standard as trainer_standard
import aad.autoencoder.utils as utils
import aad.autoencoder.evaluator as evaluator
#import aad.model_tuning as model_tuning
import aad.autoencoder.clustering as clustering
from aad.data.utils import create_full_dataloader
from aad.common.core_logging import ProcessLogger

# Configure logging for better output in the notebook
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize configuration
# You can change these paths as needed
DATA_DIR = "data"
OUTPUT_DIR = "output_7p_sma"
MODEL_DIR = "models_7p_sma"

# Set environment variables for the Config class to pick up
os.environ['DATA_DIR'] = DATA_DIR
os.environ['OUTPUT_DIR'] = OUTPUT_DIR
os.environ['MODEL_DIR'] = MODEL_DIR

config = Config()

print(f"Using data directory: {config.paths.DATA_DIR}")
print(f"Using output directory: {config.paths.OUTPUT_DIR}")
print(f"Using model directory: {config.paths.MODEL_DIR}")

# Ensure directories exist (Config() constructor already does this, but explicit is fine)
os.makedirs(config.paths.DATA_DIR, exist_ok=True)
os.makedirs(config.paths.OUTPUT_DIR, exist_ok=True)
os.makedirs(config.paths.MODEL_DIR, exist_ok=True)

## 4. Train the Autoencoder Model

This section trains the autoencoder neural network using the prepared dataset. The model learns to reconstruct normal environmental patterns, which is crucial for anomaly detection.

In [None]:
# ForestFireDetection.ipynb

import logging
from aad.autoencoder.trainer_standard import StandardTrainer
from aad.autoencoder.model_vae import VariationalAutoencoder
from aad.data.utils import create_dataloaders
from aad.common.reload_all import reload_all  # Explicit import for clarity

num_features = len(config.data_pipeline.INPUT_COLUMNS) + len(config.data_pipeline.INPUT_COLUMNS) * len(config.data_pipeline.SMA_MULTIPLIERS) * 3 # 3 is sma, min and max
config.training.BATCH_SIZE = 256
config.tuning.LATENT_DIM = 12


model = VariationalAutoencoder(
    time_steps=config.data_pipeline.WINDOW_SIZE,
    num_features=num_features,
    latent_dim=config.tuning.LATENT_DIM,
    dropout=0.2,
    d_model=num_features,
    num_heads=3,
)


In [None]:
reload_all()

config.training.RANDOM_SEED = 42
config.training.EPOCHS = 10
config.training.LEARNING_RATE = 1e-5
config.training.PATIENCE = 25
config.training.USE_BETA_SCHEDULE = True
config.training.BETA_SCHEDULE_TYPE = "cosine"
config.training.DEVICE = "cpu"
config.data_pipeline.NUM_SAMPLES = 100_000

logger = ProcessLogger(config, "trainer")
trainer = StandardTrainer(
    logger=logger,
    random_seed=config.training.RANDOM_SEED,
    epochs=config.training.EPOCHS,
    patience=config.training.PATIENCE,
    learning_rate=config.training.LEARNING_RATE,
    loss_function_name=config.training.LOSS_FUNCTION,
    optimizer_name=config.training.OPTIMIZER,
    batch_size=config.training.BATCH_SIZE,
    latent_dim=config.tuning.LATENT_DIM,
    hidden_dim=config.tuning.HIDDEN_DIM,
    window_size=config.data_pipeline.WINDOW_SIZE,
    num_features=len(config.data_pipeline.INPUT_COLUMNS),
    device=config.training.DEVICE,
    stats_images_dir=config.paths.STATS_IMAGES_DIR,
    training_statistics_image_path=config.paths.TRAINING_STATISTICS_IMAGE_PATH,
    best_model_path=config.paths.BEST_MODEL_PATH,
    loss_history_path=config.paths.LOSS_HISTORY_PATH,
    callbacks=None,
)

train_loader, val_loader, test_loader = create_dataloaders(
    config.paths.DATASET_PATH,
    config.data_pipeline.NUM_SAMPLES,
    config.training.RANDOM_SEED,
    config.training.TRAIN_SPLIT,
    config.training.VAL_SPLIT,
    config.training.BATCH_SIZE,
    ProcessLogger(config, "dataloader"),
    remove_fire_labels=True,
    fire_threshold_distance_min=20000,
)

print(train_loader.dataset.tensors[0].shape)
model, _ = trainer.train(model, train_loader, val_loader, True)
logging.info("Training completed successfully.")



## 5. Evaluate the Model

After training, the model is evaluated to assess its performance in detecting anomalies. This step calculates reconstruction errors, applies anomaly thresholds, and compares predictions against ground truth fire events to compute metrics like precision, recall, and F1-score.

In [None]:
from aad.autoencoder.evaluator import ModelEvaluator
import torch

test_loader = create_full_dataloader(
    config.paths.DATASET_PATH,
    None,
    config.training.RANDOM_SEED,
    config.training.BATCH_SIZE,
    ProcessLogger(config, "dataloader"),
)

logger = ProcessLogger(config, "evaluator")
model.load_state_dict(torch.load(config.paths.BEST_MODEL_PATH, config.training.DEVICE))
config.training.ANOMALY_THRESHOLD_PERCENTILE = 99.98
config.training.DISTANCE_FILTER_THRESHOLD_M = 7200
evaluator = ModelEvaluator(
    anomaly_threshold_percentile=config.training.ANOMALY_THRESHOLD_PERCENTILE,
    distance_filter_threshold_m=config.training.DISTANCE_FILTER_THRESHOLD_M,
    device=config.training.DEVICE,
    logger=logger,
    model=model,
    test_loader=test_loader,
    stats_images_dir=config.paths.STATS_IMAGES_DIR,
    stats_csv_dir=config.paths.STATS_CSV_DIR,
    eval_stats_img_path=config.paths.EVALUATION_STATISTICS_IMAGE_PATH,
    eval_results_csv_path=config.paths.EVALUATION_RESULTS_CSV_PATH,
    eval_summary_json_path=config.paths.EVALUATION_SUMMARY_JSON_PATH,
)
evaluator.evaluate_model(True)


In [None]:
SKIP_CLUSTERING = False

if not SKIP_CLUSTERING:
    logging.info("--- Performing model clustering for the autoencoder model ---")
    reload_all()
    dataloader = create_full_dataloader(config)
    config.tuning.KMEANS_N_CLUSTERS = 32
    config.tuning.DBSCAN_EPS = 0.5
    config.tuning.DBSCAN_MIN_SAMPLES = 5
    clustering.analyze_embeddings(config, dataloader)
else:
    logging.info("Skipping model clustering for the autoencoder model")

## 6. Hyperparameter Tuning

This optional step performs hyperparameter tuning to optimize the autoencoder's performance. It systematically explores different combinations of latent dimensions, hidden layers, and anomaly thresholds to find the best configuration.

In [None]:
# Set to True to skip this step
SKIP_HYPERPARAMETER_TUNING = True

if not SKIP_HYPERPARAMETER_TUNING:
    logging.info("--- Performing hyperparameter tuning for the autoencoder model ---")
    #tune_hyperparameters_advanced(config)
else:
    logging.info("Skipping Performing hyperparameter tuning for the autoencoder model")

## Workflow Completed

The entire forest fire detection workflow has been executed. You can now review the generated outputs in the `output/` directory.

### Key Benefits of the Refactored Structure:
- **Clear Module Organization**: Each import now comes from a logically named module
- **Single Responsibility**: Each module has a focused purpose
- **Easy Maintenance**: Code is organized and easy to find
- **No Circular Dependencies**: Clean import structure

### Updated Module Mapping:
- `data_preprocessing` → Data resampling and windowing
- `data_filtering` → Window and distance filtering  
- `data_annotation` → Fire distance calculations
- `data_sequences` → Dataset creation and sequences
- `model_training` → Autoencoder training workflow
- `eval_metrics` → Model evaluation and metrics
- `eval_tuning` → Hyperparameter optimization