# Experiment 4: Fire January set + HRRR

This notebook focuses on running Experiment 4, which uses data from January (fire period) combining PWWB, AirNow, and HRRR datasets.

# Data parameters

In [None]:
# define bounding box
lat_bottom, lat_top = 33.9, 34.2
lon_bottom, lon_top = -118.4, -118.0
extent = (lon_bottom, lon_top, lat_bottom, lat_top)

# input data shape
dim = 200
frames_per_sample = 5

# date range of data - January only for this experiment
jan_start_date, jan_end_date = "2025-01-16-00", "2025-01-23-00"

# Data ingestion and preprocessing

In [None]:
# python nonsense that allows you to import from sibling directories
import sys
sys.path.append("..")

import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from dotenv import load_dotenv

# Import the data classes
from libs.pwwbdata import PWWBData
from libs.airnowdata import AirNowData
from libs.hrrrdata import HRRRData

# Load environment variables (API keys, credentials)
load_dotenv()

# split data
def train_test_split(X, train_size=0.75):
    split_idx = int(X.shape[0] * train_size)
    X_train, X_test = X[:split_idx], X[split_idx:]
    
    return X_train, X_test

# scale training data, then scale test data based on training data stats
from sklearn.preprocessing import StandardScaler
def std_scale(X_train, X_test):
    scaler = StandardScaler()
    scaled_train = scaler.fit_transform(X_train.reshape(-1, 1)).reshape(X_train.shape)
    scaled_test = scaler.transform(X_test.reshape(-1, 1)).reshape(X_test.shape)

    return scaled_train, scaled_test

In [None]:
# Create output directory for results
output_dir = "experiment_output"
os.makedirs(output_dir, exist_ok=True)
jan_end_date_adj = pd.to_datetime(jan_end_date) - pd.Timedelta(hours=1)

# ========== 1. Load January PWWB Data ==========
print("\nLoading January PWWB data...")
jan_pwwb = PWWBData(
    start_date=jan_start_date,
    end_date=jan_end_date,
    extent=extent,
    frames_per_sample=frames_per_sample,
    dim=dim,
    env_file='.env',
    verbose=True,
    output_dir=output_dir
)
X_jan_pwwb = jan_pwwb.data
channel_info = jan_pwwb.get_channel_info()
print(f"✓ January PWWB data shape: {X_jan_pwwb.shape}")
print(f"  Channels: {channel_info['channel_order']}")

# ========== 2. Load January AirNow Data ==========
print("\nLoading January AirNow data...")
jan_airnow = AirNowData(
    start_date=jan_start_date,
    end_date=jan_end_date_adj,
    extent=extent,
    airnow_api_key=os.getenv('AIRNOW_API_KEY'),
    frames_per_sample=frames_per_sample,
    dim=dim,
    elevation_path="../libs/inputs/elevation.npy",
    mask_path="../libs/inputs/mask.npy",
    force_reprocess=False
)
X_jan_airnow = jan_airnow.data
Y_jan = jan_airnow.target_stations
print(f"✓ January AirNow data shape: {X_jan_airnow.shape}")
if Y_jan is not None:
    print(f"  January target stations shape: {Y_jan.shape}")
else:
    print("  No January target stations available")

# ========== 3. Load January HRRR Data ==========
print("\nLoading January HRRR data...")
jan_hrrr = HRRRData(
    start_date=jan_start_date,
    end_date=jan_end_date_adj,
    extent=extent,
    extent_name='la_region',
    product='COLMD',
    frames_per_sample=frames_per_sample,
    dim=dim,
    verbose=True,
    sample_setting=2
)
X_jan_hrrr = jan_hrrr.data
print(f"✓ January HRRR data shape: {X_jan_hrrr.shape}")

In [None]:
# ========== 4. Create Experiment 4 dataset ==========
print("\nCreating Experiment 4 dataset...")

# Experiment 4: Fire January set + HRRR
print("  Experiment 4: Fire January set + HRRR")
X_exp4 = np.concatenate([X_jan_pwwb, X_jan_airnow, X_jan_hrrr], axis=-1)
print(f"    Combined shape: {X_exp4.shape}")

In [None]:
# ========== 5. Train/Test Split for experiment ==========
print("\nCreating train/test splits for experiment...")
# Experiment 4 splits
X_exp4_train, X_exp4_test = train_test_split(X_exp4, train_size=0.75)
Y_jan_train, Y_jan_test = train_test_split(Y_jan, train_size=0.75)
print(f"  Experiment 4: Train={X_exp4_train.shape}, Test={X_exp4_test.shape}")

# ========== 6. Standardize data ==========
print("\nStandardizing data...")

# Experiment 4 standardization
X_exp4_train_scaled, X_exp4_test_scaled = std_scale(X_exp4_train, X_exp4_test)
print(f"  Experiment 4: Scaled train={X_exp4_train_scaled.shape}, test={X_exp4_test_scaled.shape}")

In [None]:
# ========== 7. Save prepared datasets ==========
print("\nSaving prepared dataset...")

# Create directory for experiment
exp_dir = os.path.join(output_dir, "experiment4")
os.makedirs(exp_dir, exist_ok=True)

# Save Experiment 4 data
np.save(os.path.join(exp_dir, "X_train.npy"), X_exp4_train_scaled)
np.save(os.path.join(exp_dir, "X_test.npy"), X_exp4_test_scaled)
np.save(os.path.join(exp_dir, "y_train.npy"), Y_jan_train)
np.save(os.path.join(exp_dir, "y_test.npy"), Y_jan_test)

print("\n✓ Dataset prepared and saved!")

# Data visualization

In [None]:
# Function to visualize data from experiment
def visualize_experiment_data(X, y, sample_idx=None):
    """Visualize data from the experiment"""
    # Get a random sample if none provided
    if sample_idx is None:
        np.random.seed(42)
        sample_idx = np.random.choice(range(len(X)), size=1)[0]
    
    # Get channel information
    n_channels = X.shape[4]
    n_frames = X.shape[1]
    
    # Create figure
    fig, axes = plt.subplots(n_channels, n_frames, figsize=(3*n_frames, 2*n_channels))
    if n_channels == 1:
        axes = axes.reshape(1, -1)
    
    # Plot each channel and frame
    for c in range(n_channels):
        for f in range(n_frames):
            ax = axes[c, f]
            ax.imshow(X[sample_idx, f, :, :, c])
            if f == 0:
                ax.set_ylabel(f"Channel {c}")
            ax.set_title(f"Frame {f+1}")
            ax.axis("off")
    
    # Set title
    plt.suptitle(f"Experiment 4: Fire January set + HRRR\nSample {sample_idx}")
    plt.tight_layout()
    plt.show()
    
    # Print target values
    print(f"Target values: {y[sample_idx]}")

# Visualize a sample from the experiment
print("Visualizing data...")
visualize_experiment_data(X_exp4_train_scaled, Y_jan_train)

# Model

In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.models import Model
from keras.layers import Conv3D
from keras.layers import ConvLSTM2D
from keras.layers import BatchNormalization
from keras.layers import Convolution2D, MaxPooling3D, Flatten, Reshape
from keras.layers import TimeDistributed
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import InputLayer

tf.keras.backend.set_image_data_format('channels_last')

In [None]:
# Run Experiment 4: Fire January set + HRRR
print("\n==== Running Experiment 4: Fire January set + HRRR ====")
print(f"Training data shape: {X_exp4_train_scaled.shape}")
print(f"Target data shape: {Y_jan_train.shape}")

# Build model
seq = Sequential()

seq.add(
    InputLayer(shape=X_exp4_train_scaled.shape[1:])
)

seq.add(
    ConvLSTM2D(
            filters=15, 
            kernel_size=(3, 3),
            padding='same', 
            return_sequences=True
    )
)

seq.add(
    ConvLSTM2D(
        filters=30, 
        kernel_size=(3, 3),
        padding='same', 
        return_sequences=True
    )
)

seq.add(
    Conv3D(
        filters=15, 
        kernel_size=(3, 3, 3),
        activation='relu',
        padding='same'    
    )
)

seq.add(
    Conv3D(
        filters=1, 
        kernel_size=(3, 3, 3),
        activation='relu',
        padding='same'
    )
)

seq.add(Flatten())
seq.add(Dense(Y_jan_train.shape[1], activation='relu'))

# Compile model
seq.compile(loss='mean_absolute_error', optimizer='adam')

# Print model summary
seq.summary()

# Train model
print(f"\nTraining model...")
epochs = 50  # Reduced epochs for faster testing
batch_size = 4
history = seq.fit(
    X_exp4_train_scaled, Y_jan_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2
)

# Evaluate model
print(f"\nEvaluating model...")
test_loss = seq.evaluate(X_exp4_test_scaled, Y_jan_test, verbose=0)
print(f"Test MAE: {test_loss:.4f}")

# Make predictions
y_pred = seq.predict(X_exp4_test_scaled, verbose=0)

# Calculate metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(Y_jan_test, y_pred)
rmse = np.sqrt(mean_squared_error(Y_jan_test, y_pred))
r2 = r2_score(Y_jan_test, y_pred)

print(f"Mean Absolute Error: {mae:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"R² Score: {r2:.4f}")

# Plot training history
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.legend()
plt.title('Experiment 4: Fire January set + HRRR\nTraining Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (MAE)')
plt.grid(True, alpha=0.3)
plt.show()

# Save results
results_dir = os.path.join(output_dir, "experiment4", "results")
os.makedirs(results_dir, exist_ok=True)

np.save(os.path.join(results_dir, "y_pred.npy"), y_pred)
seq.save(os.path.join(results_dir, "model.h5"))

exp4_results = {
    'model': seq,
    'history': history,
    'loss': test_loss,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'y_pred': y_pred,
    'y_test': Y_jan_test
}

# Evaluate

In [None]:
# Analyze results
print(f"\nDetailed analysis for Experiment 4:")
X_test = X_exp4_test_scaled
y_test = Y_jan_test
y_pred = exp4_results['y_pred']
model = exp4_results['model']
description = "Fire January set + HRRR, 2nd week"

print(f"Analyzing Experiment 4: {description}")

In [None]:
from libs.plotting import (
    plot_prediction_comparison,
    plot_scatter_comparison,
    plot_error_by_sensor,
    plot_time_series_comparison,
    plot_input_frames,
    print_metrics
)

# Sensor names (use AirNow sensor names if available)
if hasattr(jan_airnow, 'sensor_names') and jan_airnow.sensor_names is not None:
    sensor_names = jan_airnow.sensor_names
else:
    sensor_names = ["North Hollywood", "Los Angeles - N. Main Street", "Compton"]

print("\n1. Plotting prediction comparison...")
plot_prediction_comparison(y_pred, y_test, sensor_names, sample_idx=12)

print("\n2. Plotting scatter comparison...")
plot_scatter_comparison(y_pred, y_test)

print("\n3. Plotting error by sensor...")
plot_error_by_sensor(y_pred, y_test, sensor_names)

print("\n4. Plotting time series comparison...")
plot_time_series_comparison(y_pred, y_test, sensor_names)
    
print("\n5. Plotting time series with shifted predictions...")
plot_time_series_comparison(y_pred, y_test, sensor_names, shift_pred=1)

print("\n6. Printing metrics...")
print_metrics(y_pred, y_test, sensor_names)

In [None]:
# Save experiment comparison
with open(os.path.join(output_dir, 'experiment4_results.txt'), 'w') as f:
    f.write("==== Experiment 4 Results ====\n")
    f.write(f"Experiment 4 (Fire January set + HRRR, 2nd week): MAE = {exp4_results['mae']:.4f}, RMSE = {exp4_results['rmse']:.4f}, R² = {exp4_results['r2']:.4f}\n")
    f.write(f"\nAnalysis completed on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

print("\nExperiment 4 complete!")