# Australia Fire Prediction Modeling

This notebook demonstrates data preparation, model training, and evaluation for predicting Australian fire characteristics using satellite data.

In [7]:
# Install required packages
!pip install boto3 matplotlib seaborn scikit-learn pandas numpy kagglehub




[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: C:\Users\voqua\AppData\Local\Programs\Python\Python311\python.exe -m pip install --upgrade pip


## 1. Data Acquisition and Preparation

First, we'll set up AWS connections and retrieve data from S3

In [8]:
import boto3
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import logging
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, cross_val_score
import pickle
import sys

# Add the project root to the path so we can import the src modules
sys.path.append("..")

# Import our utility modules
from src.utils.aws_utils import download_from_s3, upload_model_to_s3, ensure_bucket_exists
from src.utils.create_dataset import clean_fire_data
from src.utils.generate_features import generate_fire_features
from src.utils.train_model import simple_parameter_tuning
from src.utils.score_model import evaluate_model
from src.utils.analysis import save_figures, plot_feature_importance

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# S3 client
s3_client = boto3.client('s3')

## 1.1 Download Data from Kaggle

We'll download the "Fires from Space" Australia and New Zealand dataset from Kaggle using kagglehub,
then upload the raw data to our S3 bucket for further processing.

In [9]:
import kagglehub
import glob
import os
from pathlib import Path

# Create data directories
local_data_dir = Path('../data')
local_data_dir.mkdir(parents=True, exist_ok=True)
raw_data_dir = local_data_dir / 'raw'
raw_data_dir.mkdir(parents=True, exist_ok=True)

# Download dataset from Kaggle
logger.info("Downloading fire dataset from Kaggle...")
try:
    # Download latest version of the dataset
    dataset_path = kagglehub.dataset_download("carlosparadis/fires-from-space-australia-and-new-zeland")
    logger.info(f"Dataset downloaded to: {dataset_path}")
    
    # List all downloaded CSV files
    csv_files = glob.glob(os.path.join(dataset_path, "*.csv"))
    logger.info(f"Found {len(csv_files)} CSV files in the dataset")
    
    if csv_files:
        # Use the first CSV file or look for a specific one
        # You might want to choose a specific file based on your needs
        fire_data_file = None
        for file in csv_files:
            if 'fire_nrt' in file.lower():
                fire_data_file = file
                break
        
        if not fire_data_file and csv_files:
            fire_data_file = csv_files[0]
            
        if fire_data_file:
            logger.info(f"Using fire data file: {fire_data_file}")
            
            # Upload the raw file to S3
            bucket_name = 'fire-prediction-data'  # Same bucket defined earlier
            aws_region = 'us-east-2'  # Same region defined earlier
            
            # Make sure the bucket exists
            ensure_bucket_exists(bucket_name, aws_region, True)
            
            # Define S3 key for the raw data
            s3_key = 'data/fire_nrt.csv'
            
            # Upload the file to S3
            try:
                logger.info(f"Uploading raw data to S3: s3://{bucket_name}/{s3_key}")
                s3_client.upload_file(fire_data_file, bucket_name, s3_key)
                logger.info(f"Successfully uploaded raw data to S3")
                
                # Update the file path for future reference
                file_path = fire_data_file
                
                # Create a small sample of the data to display
                df_sample = pd.read_csv(fire_data_file, nrows=5)
                print("\nSample of the fire data:")
                print(df_sample)
                
            except Exception as e:
                logger.error(f"Error uploading to S3: {e}")
        else:
            logger.error("No suitable CSV file found in the downloaded dataset")
    else:
        logger.error("No CSV files found in the downloaded dataset")
        
except Exception as e:
    logger.error(f"Error downloading dataset from Kaggle: {e}")
    logger.info("If you encounter authentication issues, make sure your Kaggle API credentials are set up.")
    logger.info("You can download the dataset manually from: https://www.kaggle.com/datasets/carlosparadis/fires-from-space-australia-and-new-zeland")

2025-05-29 22:43:04,118 - __main__ - INFO - Downloading fire dataset from Kaggle...
2025-05-29 22:43:04,690 - __main__ - INFO - Dataset downloaded to: C:\Users\voqua\.cache\kagglehub\datasets\carlosparadis\fires-from-space-australia-and-new-zeland\versions\1
2025-05-29 22:43:04,693 - __main__ - INFO - Found 4 CSV files in the dataset
2025-05-29 22:43:04,694 - __main__ - INFO - Using fire data file: C:\Users\voqua\.cache\kagglehub\datasets\carlosparadis\fires-from-space-australia-and-new-zeland\versions\1\fire_nrt_M6_96619.csv
2025-05-29 22:43:04,694 - src.utils.aws_utils - INFO - Checking if bucket 'fire-prediction-data' exists
2025-05-29 22:43:05,177 - src.utils.aws_utils - INFO - Bucket 'fire-prediction-data' doesn't exist, creating it in region 'us-east-2'
2025-05-29 22:43:05,726 - src.utils.aws_utils - INFO - Successfully created bucket 'fire-prediction-data'
2025-05-29 22:43:05,735 - __main__ - INFO - Uploading raw data to S3: s3://fire-prediction-data/data/fire_nrt.csv
2025-05-29


Sample of the fire data:
   latitude  longitude  brightness  scan  track    acq_date  acq_time  \
0   -14.281    143.636       323.9   1.7    1.3  2019-10-01        25   
1   -14.284    143.532       343.5   1.7    1.3  2019-10-01        25   
2   -14.302    143.706       320.2   1.7    1.3  2019-10-01        25   
3   -14.283    143.652       320.4   1.7    1.3  2019-10-01        25   
4   -14.285    143.521       349.4   1.7    1.3  2019-10-01        25   

  satellite instrument  confidence version  bright_t31    frp daynight  
0     Terra      MODIS          70  6.0NRT       302.3   26.8        D  
1     Terra      MODIS          90  6.0NRT       306.3   84.3        D  
2     Terra      MODIS          30  6.0NRT       305.0   14.1        D  
3     Terra      MODIS          57  6.0NRT       303.3   18.4        D  
4     Terra      MODIS          94  6.0NRT       304.7  110.7        D  


In [10]:
# S3 bucket details - these will be used by our pipeline too
bucket_name = 'fire-prediction-data'  # Choose a globally unique bucket name
aws_region = 'us-east-2'  # Specify your preferred region
file_path = '../data/fire_nrt.csv'  # Path to your local file
s3_key = 'data/fire_nrt.csv'  # The path/name the file will have in S3

# This config structure matches what we'll use in our YAML config
fire_config = {
    "run_config": {
        "name": "fire-prediction-model",
        "author": "Your Name",
        "version": "1.0",
        "description": "Predicts fire brightness based on satellite data",
        "dependencies": "requirements.txt",
        "data_source": f"s3://{bucket_name}/{s3_key}",
        "output": "artifacts"
    },
    "aws": {
        "bucket_name": bucket_name,
        "region": aws_region,
        "prefix": "fire-experiments",
        "create_bucket_if_missing": True
    }
}

# Check if bucket exists and create it if necessary - this will be handled by aws_utils.py in the pipeline
ensure_bucket_exists(bucket_name, aws_region, True)

2025-05-29 22:43:19,095 - src.utils.aws_utils - INFO - Checking if bucket 'fire-prediction-data' exists
2025-05-29 22:43:19,522 - src.utils.aws_utils - INFO - Bucket 'fire-prediction-data' exists


True

In [11]:
# Main data preparation
# S3 configuration
local_data_dir = Path('../data')
local_data_dir.mkdir(parents=True, exist_ok=True)
local_data_path = local_data_dir / 'fire_nrt_cleaned.csv'

# Try to load existing cleaned data first
if os.path.exists(local_data_path):
    logger.info(f"Loading existing cleaned data from {local_data_path}")
    df_clean = pd.read_csv(local_data_path)
    if 'acq_date' in df_clean.columns:
        df_clean['acq_date'] = pd.to_datetime(df_clean['acq_date'])
else:
    # Download file from S3
    temp_raw_path = '../data/fire_nrt.csv'
    if download_from_s3(bucket_name, s3_key, temp_raw_path):
        # Load the data
        try:
            df = pd.read_csv(temp_raw_path)
            logger.info(f"Loaded data with {len(df)} rows and {len(df.columns)} columns")
            
            # Clean the data using our module function
            df_clean = clean_fire_data(df)
            
            # Save cleaned data
            df_clean.to_csv(local_data_path, index=False)
            logger.info(f"Cleaned data saved to {local_data_path}")
            
            # Optionally upload cleaned data back to S3
            clean_s3_key = 'data/fire_nrt_cleaned.csv'
            s3_client.upload_file(str(local_data_path), bucket_name, clean_s3_key)
            logger.info(f"Uploaded cleaned data to s3://{bucket_name}/{clean_s3_key}")
            
        except Exception as e:
            logger.error(f"Error processing the data: {e}")
    else:
        logger.error("Failed to download the file from S3. Check your credentials and bucket name.")
        
# Make a copy for our modeling work
fire_data = df_clean.copy()

2025-05-29 22:43:19,541 - src.utils.aws_utils - INFO - Downloading data/fire_nrt.csv from bucket fire-prediction-data to ../data/fire_nrt.csv
2025-05-29 22:43:24,623 - src.utils.aws_utils - INFO - Successfully downloaded file to ../data/fire_nrt.csv
2025-05-29 22:43:24,739 - __main__ - INFO - Loaded data with 183593 rows and 14 columns
2025-05-29 22:43:24,739 - src.utils.create_dataset - INFO - Cleaning fire data
2025-05-29 22:43:24,770 - src.utils.create_dataset - INFO - After dropping missing values: 183593 rows
2025-05-29 22:43:24,789 - src.utils.create_dataset - INFO - After geographic filtering: 183591 rows
2025-05-29 22:43:24,805 - src.utils.create_dataset - INFO - After outlier removal: 179965 rows
2025-05-29 22:43:24,805 - src.utils.create_dataset - INFO - Data cleaning complete. Rows remaining: 179965
2025-05-29 22:43:25,438 - __main__ - INFO - Cleaned data saved to ..\data\fire_nrt_cleaned.csv
2025-05-29 22:43:35,005 - __main__ - INFO - Uploaded cleaned data to s3://fire-pred

## 2. Feature Selection and Data Preparation

Select features and prepare data for model training

In [12]:
# Define feature generation config
feature_config = {
    "target_column": "brightness",
    "feature_columns": ['latitude', 'longitude', 'scan', 'track', 'bright_t31', 'frp', 'confidence'],
    "derived_features": ["frp_per_area", "temperature_diff"]
}

# Generate features using our module function
fire_features = generate_fire_features(fire_data, feature_config)

# Define target and features for modeling
y = fire_features.brightness
X = fire_features[feature_config["feature_columns"] + 
                ['frp_per_area', 'temperature_diff'] if 'temperature_diff' in fire_features.columns else ['frp_per_area']]

feature_names = X.columns.tolist()

# Review data
print("Feature statistics:")
print(X.describe())

print("\nFirst few rows of features:")
print(X.head())

# Split data into training and validation sets
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)

print(f"\nTraining set size: {len(train_X)} samples")
print(f"Validation set size: {len(val_X)} samples")

2025-05-29 22:43:35,025 - src.utils.generate_features - INFO - Generating features for fire prediction model
2025-05-29 22:43:35,030 - src.utils.generate_features - INFO - Adding derived features
2025-05-29 22:43:35,033 - src.utils.generate_features - INFO - Added frp_per_area feature
2025-05-29 22:43:35,035 - src.utils.generate_features - INFO - Added temperature_diff feature
2025-05-29 22:43:35,035 - src.utils.generate_features - INFO - Feature generation complete. Total features: 16


Feature statistics:
            latitude      longitude           scan          track  \
count  179965.000000  179965.000000  179965.000000  179965.000000   
mean      -26.977825     141.933228       1.609390       1.209982   
std         8.177908      11.018903       0.815002       0.248601   
min       -43.116000     113.458000       1.000000       1.000000   
25%       -33.038000     131.569000       1.000000       1.000000   
50%       -30.060000     147.868000       1.300000       1.100000   
75%       -17.695000     150.675000       1.900000       1.300000   
max       -10.101000     153.477000       4.800000       2.000000   

          bright_t31            frp     confidence   frp_per_area  \
count  179965.000000  179965.000000  179965.000000  179965.000000   
mean      302.543666      75.477595      74.505104      38.558686   
std        11.415937     128.434577      25.041051      54.093149   
min       265.700000       0.000000       0.000000       0.000000   
25%       293

## 3. Model Training and Evaluation

### 3.1 Linear Regression Baseline

In [13]:
def train_linear_regression_baseline():
    print("Training Linear Regression baseline model...")
    
    # Create and train the model
    lr_model = LinearRegression()
    lr_model.fit(train_X, train_y)
    
    # Use our module function to evaluate the model
    metrics = evaluate_model(lr_model, val_X, val_y)
    
    # Get feature coefficients
    coefficients = pd.DataFrame({
        'feature': feature_names,
        'coefficient': lr_model.coef_
    }).sort_values('coefficient', ascending=False)
    
    print("\nFeature Coefficients:")
    print(coefficients)
    
    # Plot feature coefficients
    plt.figure(figsize=(10, 6))
    sns.barplot(x='coefficient', y='feature', data=coefficients)
    plt.title('Linear Regression Feature Coefficients')
    plt.tight_layout()
    
    # Create figures directory if it doesn't exist
    figures_dir = Path('../figures')
    figures_dir.mkdir(parents=True, exist_ok=True)
    plt.savefig(figures_dir / 'lr_feature_coefficients.png')
    plt.show()
    
    # Save the model
    model_dir = Path('../models')
    model_dir.mkdir(parents=True, exist_ok=True)
    model_path = model_dir / 'linear_regression_baseline.pkl'
    
    with open(model_path, 'wb') as f:
        pickle.dump(lr_model, f)
    
    print(f"Linear Regression model saved to {model_path}")
    
    # Upload model to S3 using our module function
    upload_model_to_s3(model_path, 'linear_regression_baseline.pkl', bucket_name, aws_region)
    
    return lr_model, metrics

# Train linear regression model
lr_model, lr_metrics = train_linear_regression_baseline()

2025-05-29 22:43:35,159 - src.utils.score_model - INFO - Model Evaluation Results:
2025-05-29 22:43:35,159 - src.utils.score_model - INFO - MAE: 0.0000
2025-05-29 22:43:35,160 - src.utils.score_model - INFO - RMSE: 0.0000
2025-05-29 22:43:35,160 - src.utils.score_model - INFO - R²: 1.0000
2025-05-29 22:43:35,160 - src.utils.score_model - INFO - Explained Variance: 1.0000


Training Linear Regression baseline model...

Feature Coefficients:
            feature   coefficient
8  temperature_diff  1.000000e+00
4        bright_t31  1.000000e+00
3             track  4.340309e-14
7      frp_per_area  1.034601e-15
6        confidence  9.385980e-16
0          latitude  3.861888e-16
5               frp  1.390482e-16
1         longitude -1.762479e-15
2              scan -2.549055e-14
Linear Regression model saved to ..\models\linear_regression_baseline.pkl


  plt.show()
2025-05-29 22:43:35,655 - src.utils.aws_utils - INFO - Successfully uploaded model to s3://fire-prediction-data/models/linear_regression_baseline.pkl


### 3.3 Fine-tune Decision Tree

In [14]:
# Use our module function for parameter tuning
best_leaf_nodes = simple_parameter_tuning(train_X, train_y)

# Create and train a model with the best max_leaf_nodes
best_simple_model = DecisionTreeRegressor(max_leaf_nodes=best_leaf_nodes, random_state=1)
best_simple_model.fit(train_X, train_y)

print("\nEvaluating model with best max_leaf_nodes:")
simple_metrics = evaluate_model(best_simple_model, val_X, val_y)

# Plot feature importance using our module function
figures_dir = Path('../figures')
figures_dir.mkdir(parents=True, exist_ok=True)
plot_feature_importance(best_simple_model, feature_names, figures_dir / 'dt_feature_importance.png')

# Save the model
model_dir = Path('../models')
model_dir.mkdir(parents=True, exist_ok=True)

with open(model_dir / 'best_decision_tree.pkl', 'wb') as f:
    pickle.dump(best_simple_model, f)

print(f"Best model saved to {model_dir / 'best_decision_tree.pkl'}")

2025-05-29 22:43:35,663 - src.utils.train_model - INFO - Performing simple parameter tuning for max_leaf_nodes
2025-05-29 22:43:35,668 - src.utils.train_model - INFO - Testing 7 candidate values for max_leaf_nodes
2025-05-29 22:43:38,954 - src.utils.train_model - INFO - max_leaf_nodes=5000, MAE=0.5561
2025-05-29 22:43:42,521 - src.utils.train_model - INFO - max_leaf_nodes=10000, MAE=0.4788
2025-05-29 22:43:46,287 - src.utils.train_model - INFO - max_leaf_nodes=15000, MAE=0.4448
2025-05-29 22:43:50,162 - src.utils.train_model - INFO - max_leaf_nodes=20000, MAE=0.4267
2025-05-29 22:43:54,137 - src.utils.train_model - INFO - max_leaf_nodes=25000, MAE=0.4173
2025-05-29 22:43:58,268 - src.utils.train_model - INFO - max_leaf_nodes=30000, MAE=0.4121
2025-05-29 22:44:02,986 - src.utils.train_model - INFO - max_leaf_nodes=50000, MAE=0.4076
2025-05-29 22:44:02,988 - src.utils.train_model - INFO - Best max_leaf_nodes: 50000
2025-05-29 22:44:04,154 - src.utils.score_model - INFO - Model Evaluation


Evaluating model with best max_leaf_nodes:
Best model saved to ..\models\best_decision_tree.pkl


## 4. Model Comparison and Upload to S3

In [15]:
# Compare the models
print("\nModel Comparison:")
print(f"Linear Regression MAE: {lr_metrics['mae']:.4f}")
print(f"Decision Tree MAE: {simple_metrics['mae']:.4f}")
print(f"Improvement with Decision Tree: {(lr_metrics['mae'] - simple_metrics['mae']) / lr_metrics['mae'] * 100:.2f}%")

# Upload the best decision tree model using our module function
upload_model_to_s3(Path('../models/best_decision_tree.pkl'), 'best_decision_tree.pkl', bucket_name, aws_region)


Model Comparison:
Linear Regression MAE: 0.0000
Decision Tree MAE: 0.3741
Improvement with Decision Tree: -648317472319083.00%


2025-05-29 22:44:09,023 - src.utils.aws_utils - INFO - Successfully uploaded model to s3://fire-prediction-data/models/best_decision_tree.pkl


True

## 5. Conclusion

In this notebook, we have:
1. Acquired and cleaned Australian fire data
2. Trained baseline linear regression and decision tree models
3. Fine-tuned the decision tree model with cross-validation
4. Compared model performance and identified the best model
5. Saved and uploaded the models to S3 for deployment

The decision tree model outperformed the linear regression baseline, with particular importance placed on features like brightness temperature, fire radiative power, and location coordinates.

This notebook serves as a prototype for our pipeline implementation.

## 6. Creating Configuration for Multi-Model Pipeline

Now we'll create a configuration file that supports training both Linear Regression and Decision Tree models in our pipeline.

In [16]:
import yaml

# Define a comprehensive configuration that supports multiple models
multi_model_config = {
    "run_config": {
        "name": "fire-prediction-multi-model",
        "author": "Your Name",
        "version": "1.0",
        "description": "Predicts fire brightness using multiple regression models",
        "dependencies": "requirements.txt",
        "data_source": f"s3://{bucket_name}/{s3_key}",
        "output": "artifacts"
    },
    "create_dataset": {
        "critical_columns": [
            "latitude", "longitude", "brightness", "scan", "track", 
            "bright_t31", "frp", "confidence"
        ],
        "region_filter": {
            "enabled": True,
            "min_latitude": -44,
            "max_latitude": -10,
            "min_longitude": 112,
            "max_longitude": 154
        },
        "outlier_removal": {
            "enabled": True,
            "column": "brightness",
            "std_threshold": 3
        },
        "target_column": "brightness"
    },
    "generate_features": {
        "target_column": "brightness",
        "feature_columns": [
            "latitude", "longitude", "scan", "track", 
            "bright_t31", "frp", "confidence"
        ],
        "derived_features": [
            "frp_per_area", 
            "temperature_diff"
        ],
        "transformations": {
            "log_transform": {
                "log_frp": "frp"
            },
            "normalize": {
                "enabled": True,
                "method": "min_max",
                "columns": ["latitude", "longitude", "scan", "track"]
            }
        }
    },
    "train_model": {
        "models": [
            {
                "name": "linear_regression",
                "type": "LinearRegression",
                "hyperparameters": {},
                "is_default": False
            },
            {
                "name": "decision_tree",
                "type": "DecisionTreeRegressor",
                "hyperparameters": {
                    "max_leaf_nodes": best_leaf_nodes,  # Use the best value from our tuning
                    "random_state": 42
                },
                "is_default": True  # This will be used for default predictions
            }
        ],
        "test_size": 0.3,
        "random_state": 42,
        "target_column": "brightness",
        "initial_features": [
            "latitude", "longitude", "scan", "track", 
            "bright_t31", "frp", "confidence", 
            "frp_per_area", "temperature_diff"
        ]
    },
    "score_model": {
        "metrics": [
            "mae",
            "rmse",
            "r2",
            "explained_variance"
        ]
    },
    "evaluate_performance": {
        "visualization": [
            "feature_importance",
            "predictions_vs_actual",
            "residuals_plot",
            "model_comparison"  # New visualization for comparing models
        ],
        "save_format": "png"
    },
    "aws": {
        "upload": True,
        "bucket_name": bucket_name,
        "prefix": "fire-experiments",
        "region": aws_region,
        "create_bucket_if_missing": True
    }
}

# Create the directory for the config file if it doesn't exist
config_dir = Path('../config')
config_dir.mkdir(parents=True, exist_ok=True)

# Save the configuration to a YAML file
config_path = config_dir / 'fire-prediction-multi-model-config.yaml'
with open(config_path, 'w') as f:
    yaml.dump(multi_model_config, f, default_flow_style=False)

print(f"Multi-model configuration saved to {config_path}")

Multi-model configuration saved to ..\config\fire-prediction-multi-model-config.yaml


## 7. Pipeline-Notebook Alignment

To ensure that the pipeline training process exactly matches what we've done in this notebook:

1. **Data Preprocessing**: The pipeline's `create_dataset` and `generate_features` functions should match our cleaning and feature generation steps

2. **Feature Selection**: We're using these features in our models:
   - latitude, longitude, scan, track, bright_t31, frp, confidence 
   - Plus derived features: frp_per_area, temperature_diff

3. **Training Process**: 
   - Same train/test split (test_size=0.3, random_state=42)
   - Same model types (LinearRegression, DecisionTreeRegressor) 
   - Same hyperparameters (max_leaf_nodes based on tuning)

4. **Evaluation Metrics**: 
   - MAE, RMSE, R², explained variance

The pipeline has been updated to follow this exact workflow to ensure consistency between interactive exploration and production deployment.

In [17]:
# Generate a config summary that helps ensure pipeline-notebook alignment
alignment_check = {
    "notebook_features": feature_names,
    "notebook_models": {
        "linear_regression": {
            "type": "LinearRegression",
            "params": {}
        },
        "decision_tree": {
            "type": "DecisionTreeRegressor",
            "params": {"max_leaf_nodes": best_leaf_nodes, "random_state": 1}
        }
    },
    "train_test_split": {
        "test_size": 0.3,
        "random_state": 1
    },
    "metrics": {
        "linear_regression": lr_metrics,
        "decision_tree": simple_metrics
    }
}

print("Pipeline-Notebook Alignment Check:")
print(f"Features used: {alignment_check['notebook_features']}")
print(f"Test size: {alignment_check['train_test_split']['test_size']}")
print(f"Random state: {alignment_check['train_test_split']['random_state']}")
print("\nMetrics to match in pipeline:")
print(f"Linear Regression MAE: {lr_metrics['mae']:.4f}")
print(f"Decision Tree MAE: {simple_metrics['mae']:.4f}")
print(f"Improvement with Decision Tree: {(lr_metrics['mae'] - simple_metrics['mae']) / lr_metrics['mae'] * 100:.2f}%")

Pipeline-Notebook Alignment Check:
Features used: ['latitude', 'longitude', 'scan', 'track', 'bright_t31', 'frp', 'confidence', 'frp_per_area', 'temperature_diff']
Test size: 0.3
Random state: 1

Metrics to match in pipeline:
Linear Regression MAE: 0.0000
Decision Tree MAE: 0.3741
Improvement with Decision Tree: -648317472319083.00%
