In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import holidays
import joblib
import os
from datetime import datetime
import joblib
warnings.filterwarnings('ignore')


In [22]:
def engineer_features(df):
    """
    FINAL CORRECTED version using merge operations to avoid index issues
    """
    import pandas as pd
    import numpy as np
    import holidays
    
    # Make a copy to avoid modifying the original
    data = df.copy()
    
    # Ensure datetime is properly parsed
    if data['datetime'].dtype == 'object':
        data['datetime'] = pd.to_datetime(data['datetime'])
    
    # Create incident_datetime if it doesn't exist
    if 'incident_datetime' not in data.columns:
        data['incident_datetime'] = data['datetime']
    
    # Add row identifier to preserve order
    data['_row_id'] = range(len(data))
    
    # ===== TEMPORAL FEATURES =====
    
    # Basic time components
    data['hour'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek  # 0=Monday, 6=Sunday
    data['month'] = data['datetime'].dt.month
    data['quarter'] = data['datetime'].dt.quarter
    data['day_of_year'] = data['datetime'].dt.dayofyear
    
    # Binary temporal indicators
    data['is_weekend'] = data['day_of_week'].isin([5, 6]).astype(int)  # Saturday, Sunday
    data['is_night'] = ((data['hour'] >= 22) | (data['hour'] <= 6)).astype(int)  # 10 PM - 6 AM
    data['is_rush_hour'] = (((data['hour'] >= 7) & (data['hour'] <= 9)) | 
                           ((data['hour'] >= 16) & (data['hour'] <= 18))).astype(int)  # Morning & evening rush
    data['is_business_hours'] = ((data['hour'] >= 9) & (data['hour'] <= 17) & 
                                (data['day_of_week'] < 5)).astype(int)  # 9 AM - 5 PM weekdays
    
    # Season mapping
    data['season'] = data['month'].map({
        12: 'Winter', 1: 'Winter', 2: 'Winter',
        3: 'Spring', 4: 'Spring', 5: 'Spring',
        6: 'Summer', 7: 'Summer', 8: 'Summer',
        9: 'Fall', 10: 'Fall', 11: 'Fall'
    })
    
    # Shift mapping using a proper approach
    def get_shift(hour):
        if 6 <= hour < 14:
            return 'Day'
        elif 14 <= hour < 22:
            return 'Evening'
        else:
            return 'Night'
    
    data['shift'] = data['hour'].apply(get_shift)
    
    # Holiday indicator (US federal holidays)
    us_holidays = holidays.US()
    data['is_holiday'] = data['datetime'].dt.date.isin(us_holidays).astype(int)
    
    # ===== GEOGRAPHIC FEATURES =====
    
    # Distance from center (assuming Nashville city center: 36.1627, -86.7816)
    nashville_center_lat = 36.1627
    nashville_center_lon = -86.7816
    
    def haversine_distance(lat1, lon1, lat2, lon2):
        """Calculate the great circle distance between two points on earth"""
        R = 3959  # Earth's radius in miles
        
        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        
        a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
        c = 2 * np.arcsin(np.sqrt(a))
        distance = R * c
        
        return distance
    
    data['distance_from_center'] = data.apply(lambda row: haversine_distance(
        row['lat'], row['lon'], nashville_center_lat, nashville_center_lon), axis=1)
    
    # ===== WORKLOAD FEATURES USING MERGE APPROACH =====
    
    # Sort by datetime for all calculations
    data = data.sort_values('datetime').reset_index(drop=True)
    
    print("Calculating workload features using merge approach...")
    
    # 1. System-wide incidents in last hour
    data_temp = data.set_index('datetime')
    data_temp['count_helper'] = 1
    system_workload = data_temp['count_helper'].rolling('1h', closed='left').sum().fillna(0)
    data['system_incidents_last_hour'] = system_workload.values
    
    # 2. Category-specific incidents in last 24 hours using merge approach
    if 'category' in data.columns:
        print("Calculating category-specific workload...")
        category_workload_list = []
        
        for category in data['category'].unique():
            if pd.isna(category):
                continue
            
            # Get data for this category only
            cat_data = data[data['category'] == category][['datetime', '_row_id']].copy()
            cat_data = cat_data.set_index('datetime')
            cat_data['count_helper'] = 1
            
            # Calculate rolling sum for this category
            cat_rolling = cat_data['count_helper'].rolling('24h', closed='left').sum().fillna(0)
            
            # Create dataframe with results
            cat_results = pd.DataFrame({
                '_row_id': cat_data['_row_id'],
                'category_incidents_last_24h': cat_rolling.values
            })
            
            category_workload_list.append(cat_results)
        
        # Combine all category results
        if category_workload_list:
            all_category_workload = pd.concat(category_workload_list, ignore_index=True)
            # Merge back to main data
            data = data.merge(all_category_workload, on='_row_id', how='left')
            data['category_incidents_last_24h'] = data['category_incidents_last_24h'].fillna(0)
        else:
            data['category_incidents_last_24h'] = 0
    else:
        data['category_incidents_last_24h'] = 0
    
    # 3. Zone-specific incidents using merge approach
    if 'ZONE_ID' in data.columns:
        print("Calculating zone-specific workload...")
        zone_workload_list = []
        
        for zone_id in data['ZONE_ID'].unique():
            if pd.isna(zone_id):
                continue
            
            # Get data for this zone only
            zone_data = data[data['ZONE_ID'] == zone_id][['datetime', '_row_id']].copy()
            zone_data = zone_data.set_index('datetime')
            zone_data['count_helper'] = 1
            
            # Calculate rolling sums for this zone
            zone_rolling_week = zone_data['count_helper'].rolling('7D', closed='left').sum().fillna(0)
            zone_rolling_month = zone_data['count_helper'].rolling('30D', closed='left').sum().fillna(0)
            
            # Create dataframe with results
            zone_results = pd.DataFrame({
                '_row_id': zone_data['_row_id'],
                'zone_incidents_last_week': zone_rolling_week.values,
                'zone_incidents_last_month': zone_rolling_month.values
            })
            
            zone_workload_list.append(zone_results)
        
        # Combine all zone results
        if zone_workload_list:
            all_zone_workload = pd.concat(zone_workload_list, ignore_index=True)
            # Merge back to main data
            data = data.merge(all_zone_workload, on='_row_id', how='left')
            data['zone_incidents_last_week'] = data['zone_incidents_last_week'].fillna(0)
            data['zone_incidents_last_month'] = data['zone_incidents_last_month'].fillna(0)
        else:
            data['zone_incidents_last_week'] = 0
            data['zone_incidents_last_month'] = 0
    else:
        data['zone_incidents_last_week'] = 0
        data['zone_incidents_last_month'] = 0
    
    # Remove helper column
    data = data.drop(columns=['_row_id'])
    
    print("Workload feature calculation completed!")
    
    return data


In [23]:

incidents=pd.read_csv('data/incidents_data_for_modeling.csv')  # Example data loading
incidents_featurized = engineer_features(incidents)  # Feature engineering on a subset for speed



Calculating workload features using merge approach...
Calculating category-specific workload...
Calculating zone-specific workload...
Workload feature calculation completed!


In [24]:
# Fire Incident Response Time Model - Complete Training and ONNX Export
import json
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import torch.onnx


print("=== FIRE INCIDENT RESPONSE TIME MODEL - TRAINING & EXPORT ===\n")

# --- 1. Prepare Fire Incident Data ---
print("1. Preparing fire incident data...")

# Use the engineered features from incidents_featurized
df_fire = incidents_featurized.copy()

# Define the features we want to use (same as in DataPreprocessor)
temporal_features = [
    'hour', 'day_of_week', 'month', 'quarter', 'day_of_year',
    'is_weekend', 'is_night', 'is_rush_hour', 'is_business_hours'
]

workload_features = [
    # 'category_incidents_last_24h', 'system_incidents_last_hour',
]

geographic_features = [
    'lat', 'lon', 'distance_from_center' 
]

categorical_features = [
    'shift', 'season', 'category', 'ZONE_ID', 'incident_type'
]

# Filter features that exist in the data
available_numerical = [f for f in temporal_features + workload_features + geographic_features if f in df_fire.columns]
available_categorical = [f for f in categorical_features if f in df_fire.columns]

print(f"   - Numerical features: {len(available_numerical)}")
print(f"   - Categorical features: {len(available_categorical)}")

# Filter valid data
valid_mask = (
    df_fire['response_time'].notna() & 
    (df_fire['response_time'] > 0) 
)
df_clean = df_fire[valid_mask].copy()

print(f"   - Total valid samples: {len(df_clean)}")

# Take a subset for demonstration (you can use full data)

df_sample = df_clean

print(f"   - Using sample size: {len(df_sample)}")
print(f"   - Target variable range: {df_sample['response_time'].min():.1f} - {df_sample['response_time'].max():.1f}")

# --- 2. Split Data First ---
print("\n2. Splitting data temporally...")

# Sort by datetime to ensure temporal ordering
if 'datetime' in df_sample.columns:
    df_sample_sorted = df_sample.sort_values('datetime').reset_index(drop=True)
    print(f"   - Data sorted by datetime: {df_sample_sorted['datetime'].min()} to {df_sample_sorted['datetime'].max()}")
else:
    df_sample_sorted = df_sample.copy()

# Separate numerical and categorical features
X_numerical = df_sample_sorted[available_numerical]
X_categorical = df_sample_sorted[available_categorical]
y = df_sample_sorted['response_time']

# Split data first (70% train, 30% validation - temporal split)
train_size = int(0.7 * len(X_numerical))

X_numerical_train = X_numerical[:train_size]
X_numerical_val = X_numerical[train_size:]
X_categorical_train = X_categorical[:train_size]
X_categorical_val = X_categorical[train_size:]
y_train_raw = y[:train_size]
y_val_raw = y[train_size:]

print(f"   - Training samples: {len(X_numerical_train)} (earliest data)")
print(f"   - Validation samples: {len(X_numerical_val)} (latest data)")

# --- 3. Preprocess Data Properly ---
print("\n3. Preprocessing data...")

# First, fit one-hot encoder on the whole dataset (all categorical values should be known)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore', drop='first')
encoder.fit(X_categorical)  # Fit on whole dataset

# Get encoded feature names
encoded_feature_names = encoder.get_feature_names_out(available_categorical)

# Now apply preprocessing correctly:
# 1. Fit scaler ONLY on training data, then transform both
scaler = StandardScaler()
X_numerical_train_scaled = scaler.fit_transform(X_numerical_train)  # Fit on training only
X_numerical_val_scaled = scaler.transform(X_numerical_val)  # Transform validation

# Convert to DataFrames
X_numerical_train_df = pd.DataFrame(X_numerical_train_scaled, columns=available_numerical)
X_numerical_val_df = pd.DataFrame(X_numerical_val_scaled, columns=available_numerical)

# 2. Transform categorical data (encoder already fitted on full dataset)
X_categorical_train_encoded = encoder.transform(X_categorical_train)
X_categorical_val_encoded = encoder.transform(X_categorical_val)

X_categorical_train_df = pd.DataFrame(X_categorical_train_encoded, columns=encoded_feature_names)
X_categorical_val_df = pd.DataFrame(X_categorical_val_encoded, columns=encoded_feature_names)

# 3. Combine features for each set
X_train_combined = pd.concat([X_numerical_train_df, X_categorical_train_df], axis=1)
X_val_combined = pd.concat([X_numerical_val_df, X_categorical_val_df], axis=1)

# Get the number of input features for the model
input_features = X_train_combined.shape[1]

print(f"   - Encoder fitted on full dataset")
print(f"   - Scaler fitted only on training data")
print(f"   - Final feature count: {input_features}")
print(f"   - Numerical features: {len(available_numerical)}")
print(f"   - Encoded categorical features: {len(encoded_feature_names)}")

# Convert to tensors
X_train = torch.tensor(X_train_combined.values, dtype=torch.float32)
X_val = torch.tensor(X_val_combined.values, dtype=torch.float32)
y_train = torch.tensor(y_train_raw.values, dtype=torch.float32).view(-1, 1)
y_val = torch.tensor(y_val_raw.values, dtype=torch.float32).view(-1, 1)

# --- 4. Save the Feature Mapping to JSON ---
print("\n4. Saving feature mapping...")

# Create comprehensive feature mapping
feature_mapping = {
    "model_info": {
        "model_type": "fire_incident_response_time_predictor",
        "input_features": input_features,
        "target": "response_time",
        "sample_size": len(df_sample)
    },
    "numerical_features": {
        "names": available_numerical,
        "count": len(available_numerical),
        "scaler_params": {
            "mean": scaler.mean_.tolist(),
            "scale": scaler.scale_.tolist()
        }
    },
    "categorical_features": {
        "original_names": available_categorical,
        "original_count": len(available_categorical),
        "encoded_names": encoded_feature_names.tolist(),
        "encoded_count": len(encoded_feature_names),
        "categories": {}
    },
    "feature_order": list(X_train_combined.columns),
    "preprocessing_info": {
        "numerical_scaling": "StandardScaler (fitted on training data only)",
        "categorical_encoding": "OneHotEncoder (fitted on full dataset, drop='first')"
    }
}

# Add category mappings
for i, feature in enumerate(available_categorical):
    categories = encoder.categories_[i].tolist()
    feature_mapping["categorical_features"]["categories"][feature] = {
        "all_categories": categories,
        "dropped_category": categories[0],  # First category is dropped
        "encoded_categories": categories[1:]
    }

# Save to JSON
os.makedirs("models", exist_ok=True)
mapping_file = "models/fire_model_features_mapping.json"
with open(mapping_file, "w") as f:
    json.dump(feature_mapping, f, indent=4)

print(f"   - Feature mapping saved to {mapping_file}")

=== FIRE INCIDENT RESPONSE TIME MODEL - TRAINING & EXPORT ===

1. Preparing fire incident data...
   - Numerical features: 12
   - Categorical features: 5
   - Total valid samples: 495802
   - Using sample size: 495802
   - Target variable range: 1.0 - 34364.0

2. Splitting data temporally...
   - Data sorted by datetime: 2022-01-01 00:10:50 to 2025-07-22 23:54:24
   - Training samples: 347061 (earliest data)
   - Validation samples: 148741 (latest data)

3. Preprocessing data...
   - Encoder fitted on full dataset
   - Scaler fitted only on training data
   - Final feature count: 503
   - Numerical features: 12
   - Encoded categorical features: 491

4. Saving feature mapping...
   - Feature mapping saved to models/fire_model_features_mapping.json


In [25]:
feature_mapping

{'model_info': {'model_type': 'fire_incident_response_time_predictor',
  'input_features': 503,
  'target': 'response_time',
  'sample_size': 495802},
 'numerical_features': {'names': ['hour',
   'day_of_week',
   'month',
   'quarter',
   'day_of_year',
   'is_weekend',
   'is_night',
   'is_rush_hour',
   'is_business_hours',
   'lat',
   'lon',
   'distance_from_center'],
  'count': 12,
  'scaler_params': {'mean': [12.820953665205828,
    3.025422043963453,
    6.000066270770844,
    2.3364192461843882,
    166.93459074917666,
    0.28920852530246843,
    0.2538977297938979,
    0.2871137926762154,
    0.34588732240153747,
    36.15432638131337,
    -86.7506632231337,
    5.564732311786937],
   'scale': [6.298080409700673,
    1.9945979043675708,
    3.364623738211664,
    1.0908999640164148,
    102.77951744783464,
    0.4533949207863272,
    0.43523978747283976,
    0.4524151442329209,
    0.4756566856498837,
    0.0675571328050263,
    0.08000633555215991,
    3.750608162746109]}

In [26]:


import os
# Option 1: Deeper Network with Residual Connections
class FireIncidentNN_Deeper(nn.Module):
    def __init__(self, input_features):
        super(FireIncidentNN_Deeper, self).__init__()
        
        # Much deeper network with residual connections
        self.fc1 = nn.Linear(input_features, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(0.3)
        
        self.fc2 = nn.Linear(512, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.3)
        
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.dropout3 = nn.Dropout(0.2)
        
        self.fc4 = nn.Linear(256, 256)
        self.bn4 = nn.BatchNorm1d(256)
        self.dropout4 = nn.Dropout(0.2)
        
        self.fc5 = nn.Linear(256, 128)
        self.bn5 = nn.BatchNorm1d(128)
        self.dropout5 = nn.Dropout(0.1)
        
        self.fc6 = nn.Linear(128, 64)
        self.bn6 = nn.BatchNorm1d(64)
        self.dropout6 = nn.Dropout(0.1)
        
        self.fc_out = nn.Linear(64, 1)
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        # First block
        x1 = torch.relu(self.bn1(self.fc1(x)))
        x1 = self.dropout1(x1)
        
        # Second block with residual connection
        x2 = torch.relu(self.bn2(self.fc2(x1)))
        x2 = self.dropout2(x2) + x1  # Residual connection
        
        # Third block
        x3 = torch.relu(self.bn3(self.fc3(x2)))
        x3 = self.dropout3(x3)
        
        # Fourth block with residual connection
        x4 = torch.relu(self.bn4(self.fc4(x3)))
        x4 = self.dropout4(x4) + x3  # Residual connection
        
        # Fifth block
        x5 = torch.relu(self.bn5(self.fc5(x4)))
        x5 = self.dropout5(x5)
        
        # Sixth block
        x6 = torch.relu(self.bn6(self.fc6(x5)))
        x6 = self.dropout6(x6)
        
        # Output
        output = self.fc_out(x6)
        return output

# Option 2: Wide Network (More neurons per layer, fewer layers)
class FireIncidentNN_Wide(nn.Module):
    def __init__(self, input_features):
        super(FireIncidentNN_Wide, self).__init__()
        
        self.fc1 = nn.Linear(input_features, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.dropout1 = nn.Dropout(0.4)
        
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.3)
        
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.dropout3 = nn.Dropout(0.2)
        
        self.fc4 = nn.Linear(256, 1)
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.he_normal_(module.weight)  # He initialization for ReLU
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x

# Option 3: Attention-based Network
class FireIncidentNN_Attention(nn.Module):
    def __init__(self, input_features):
        super(FireIncidentNN_Attention, self).__init__()
        
        self.input_transform = nn.Linear(input_features, 512)
        self.bn_input = nn.BatchNorm1d(512)
        
        # Self-attention mechanism
        self.attention = nn.MultiheadAttention(embed_dim=512, num_heads=8, batch_first=True)
        self.norm1 = nn.LayerNorm(512)
        
        # Feed-forward network
        self.ff1 = nn.Linear(512, 1024)
        self.ff2 = nn.Linear(1024, 512)
        self.norm2 = nn.LayerNorm(512)
        self.dropout = nn.Dropout(0.3)
        
        # Final layers
        self.fc1 = nn.Linear(512, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 1)
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        # Transform input
        x = torch.relu(self.bn_input(self.input_transform(x)))
        
        # Add sequence dimension for attention (batch_size, seq_len=1, features)
        x = x.unsqueeze(1)
        
        # Self-attention
        attn_out, _ = self.attention(x, x, x)
        x = self.norm1(x + attn_out)
        
        # Feed-forward
        ff_out = torch.relu(self.ff1(x))
        ff_out = self.ff2(ff_out)
        x = self.norm2(x + self.dropout(ff_out))
        
        # Remove sequence dimension
        x = x.squeeze(1)
        
        # Final layers
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Option 4: Ensemble Network (Multiple paths)
class FireIncidentNN_Ensemble(nn.Module):
    def __init__(self, input_features):
        super(FireIncidentNN_Ensemble, self).__init__()
        
        # Path 1: Deep narrow network
        self.path1_fc1 = nn.Linear(input_features, 256)
        self.path1_fc2 = nn.Linear(256, 128)
        self.path1_fc3 = nn.Linear(128, 64)
        self.path1_fc4 = nn.Linear(64, 32)
        
        # Path 2: Wide shallow network
        self.path2_fc1 = nn.Linear(input_features, 512)
        self.path2_fc2 = nn.Linear(512, 256)
        
        # Path 3: Medium network
        self.path3_fc1 = nn.Linear(input_features, 384)
        self.path3_fc2 = nn.Linear(384, 192)
        self.path3_fc3 = nn.Linear(192, 96)
        
        # Combination layer
        self.combine = nn.Linear(32 + 256 + 96, 128)
        self.final = nn.Linear(128, 1)
        
        self.dropout = nn.Dropout(0.3)
        self.bn = nn.BatchNorm1d(128)
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        # Path 1: Deep narrow
        p1 = torch.relu(self.path1_fc1(x))
        p1 = torch.relu(self.path1_fc2(p1))
        p1 = torch.relu(self.path1_fc3(p1))
        p1 = torch.relu(self.path1_fc4(p1))
        
        # Path 2: Wide shallow
        p2 = torch.relu(self.path2_fc1(x))
        p2 = torch.relu(self.path2_fc2(p2))
        
        # Path 3: Medium
        p3 = torch.relu(self.path3_fc1(x))
        p3 = torch.relu(self.path3_fc2(p3))
        p3 = torch.relu(self.path3_fc3(p3))
        
        # Combine all paths
        combined = torch.cat([p1, p2, p3], dim=1)
        combined = torch.relu(self.bn(self.combine(combined)))
        combined = self.dropout(combined)
        
        output = self.final(combined)
        return output

# Option 5: Simple Compact Network (Fewer parameters)
class FireIncidentNN_Compact(nn.Module):
    def __init__(self, input_features):
        super(FireIncidentNN_Compact, self).__init__()
        
        self.fc1 = nn.Linear(input_features, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.dropout3 = nn.Dropout(0.1)
        
        self.fc4 = nn.Linear(64, 1)
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x
    
class FireIncidentNN(nn.Module):
    def __init__(self, input_features):
        super(FireIncidentNN, self).__init__()
        
        # Define the network architecture
        self.fc1 = nn.Linear(input_features, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.1)
        
        self.fc4 = nn.Linear(128, 1)

        # Initialize weights
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x


print("✅ Alternative model architectures defined!")
print("\nAvailable models:")
print("1. FireIncidentNN_Deeper - Deep network with residual connections")
print("2. FireIncidentNN_Wide - Wide network with more neurons per layer")
print("3. FireIncidentNN_Attention - Self-attention based network")
print("4. FireIncidentNN_Ensemble - Multiple pathway ensemble")
print("5. FireIncidentNN_Compact - Smaller, more efficient network")
print("\nTo use any of these, simply replace 'FireIncidentNN' with the desired class name in the model instantiation below.")

✅ Alternative model architectures defined!

Available models:
1. FireIncidentNN_Deeper - Deep network with residual connections
2. FireIncidentNN_Wide - Wide network with more neurons per layer
3. FireIncidentNN_Attention - Self-attention based network
4. FireIncidentNN_Ensemble - Multiple pathway ensemble
5. FireIncidentNN_Compact - Smaller, more efficient network

To use any of these, simply replace 'FireIncidentNN' with the desired class name in the model instantiation below.


In [27]:
incidents_featurized.columns

Index(['incident_id', 'incident_type', 'datetime', 'lat', 'lon', 'category',
       'response_time', 'resolution_time', 'ZONE_ID', 'incident_datetime',
       'hour', 'day_of_week', 'month', 'quarter', 'day_of_year', 'is_weekend',
       'is_night', 'is_rush_hour', 'is_business_hours', 'season', 'shift',
       'is_holiday', 'distance_from_center', 'system_incidents_last_hour',
       'category_incidents_last_24h', 'zone_incidents_last_week',
       'zone_incidents_last_month'],
      dtype='object')

In [28]:

# Easy Model Switching - Just change the model_choice variable!

# Choose which model to use:
# Options: 'original', 'deeper', 'wide', 'attention', 'ensemble', 'compact'
model_choice = 'original'  # Change this to try different models

# Model selection mapping
model_classes = {
    'original': FireIncidentNN,
    'deeper': FireIncidentNN_Deeper,
    'wide': FireIncidentNN_Wide,
    'attention': FireIncidentNN_Attention,
    'ensemble': FireIncidentNN_Ensemble,
    'compact': FireIncidentNN_Compact
}

# Get the selected model class
SelectedModel = model_classes[model_choice]

print(f"🔥 Selected Model: {model_choice.upper()}")
print(f"🏗️  Model Class: {SelectedModel.__name__}")

# Now use SelectedModel instead of FireIncidentNN in your training code
# Just replace this line in the main training code:
# fire_model = FireIncidentNN(input_features)
# With:
# fire_model = SelectedModel(input_features)





🔥 Selected Model: ORIGINAL
🏗️  Model Class: FireIncidentNN


In [29]:
# --- 5. Define the Fire Incident PyTorch Model ---
print("\n5. Defining PyTorch model...")

# Instantiate the selected model
fire_model = SelectedModel(input_features)

print(f"   - Model created with {input_features} input features")
print(f"   - Model type: {SelectedModel.__name__}")
print(f"   - Total parameters: {sum(p.numel() for p in fire_model.parameters()):,}")

# --- 6. Train the Model ---
print("\n6. Training the model...")
print(f"   - Validation samples: {len(X_val)} (latest data)")

# Training setup
criterion = nn.MSELoss()
optimizer = optim.Adam(fire_model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)

num_epochs = 200
best_val_loss = float('inf')
patience_counter = 0
patience = 20

train_losses = []
val_losses = []

fire_model.train()
for epoch in range(num_epochs):
    # Training
    fire_model.train()
    optimizer.zero_grad()
    train_outputs = fire_model(X_train)
    train_loss = criterion(train_outputs, y_train)
    train_loss.backward()
    optimizer.step()
    
    # Validation
    fire_model.eval()
    with torch.no_grad():
        val_outputs = fire_model(X_val)
        val_loss = criterion(val_outputs, y_val)
    
    train_losses.append(train_loss.item())
    val_losses.append(val_loss.item())
    
    # Learning rate scheduling
    scheduler.step(val_loss)
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # Save best model state
        best_model_state = fire_model.state_dict().copy()
    else:
        patience_counter += 1
    
    if (epoch + 1) % 25 == 0:
        print(f'   Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Val Loss: {val_loss.item():.4f}')
    
    if patience_counter >= patience:
        print(f"   Early stopping at epoch {epoch+1}")
        break

# Load best model
fire_model.load_state_dict(best_model_state)

print(f"\n   Training complete!")
print(f"   Best validation loss: {best_val_loss:.4f}")

# Calculate final metrics
fire_model.eval()
with torch.no_grad():
    train_pred = fire_model(X_train)
    val_pred = fire_model(X_val)
    
    train_mae = torch.mean(torch.abs(train_pred - y_train)).item()
    val_mae = torch.mean(torch.abs(val_pred - y_val)).item()
    
    print(f"   Final Train MAE: {train_mae:.2f}")
    print(f"   Final Validation MAE: {val_mae:.2f}")

# --- 6. Export to ONNX ---
print("\n6. Exporting to ONNX...")

# Set model to evaluation mode
fire_model.eval()

# Create a dummy input tensor with the same shape as the training data
dummy_input = torch.randn(1, input_features)

# Export the model
onnx_model_path = "models/fire_incident_model.onnx"
try:
    torch.onnx.export(
        fire_model,
        dummy_input,
        onnx_model_path,
        verbose=False,
        opset_version=14,  # Recommended for compatibility
        export_params=True,
        do_constant_folding=True
    )
    print(f"   ✅ Model successfully exported to {onnx_model_path}")
except Exception as e:
    print(f"   ❌ Error during ONNX export: {e}")

# --- 7. Save Additional Model Information ---
print("\n7. Saving additional model information...")


# Save the PyTorch model state
torch_model_path = "models/fire_incident_model.pth"
torch.save({
    'model_state_dict': fire_model.state_dict(),
    'feature_mapping': feature_mapping
}, torch_model_path)

print(f"   ✅ PyTorch model saved to {torch_model_path}")

print("\n" + "="*60)
print("🔥 FIRE INCIDENT MODEL TRAINING & EXPORT COMPLETE!")
print("="*60)
print(f"📁 Files saved:")
print(f"   - ONNX model: {onnx_model_path}")
print(f"   - PyTorch model: {torch_model_path}")
print(f"   - Feature mapping: {mapping_file}")

print(f"\n🎯 Model Performance:")
print(f"   - Validation MAE: {val_mae:.2f} seconds")
print(f"   - Input features: {input_features}")
print(f"   - Training samples: {len(X_train):,}")
print("="*60)


5. Defining PyTorch model...
   - Model created with 503 input features
   - Model type: FireIncidentNN
   - Total parameters: 424,193

6. Training the model...
   - Validation samples: 148741 (latest data)
   Epoch [25/200], Train Loss: 6366948.5000, Val Loss: 6447840.0000
   Epoch [50/200], Train Loss: 6355550.0000, Val Loss: 6434829.0000
   Epoch [75/200], Train Loss: 6345579.5000, Val Loss: 6425259.0000
   Epoch [100/200], Train Loss: 6335150.5000, Val Loss: 6418079.0000
   Epoch [125/200], Train Loss: 6323251.5000, Val Loss: 6407668.5000
   Epoch [150/200], Train Loss: 6309720.5000, Val Loss: 6394594.0000
   Epoch [175/200], Train Loss: 6293699.5000, Val Loss: 6377625.0000
   Epoch [200/200], Train Loss: 6275508.5000, Val Loss: 6358323.0000

   Training complete!
   Best validation loss: 6358323.0000
   Final Train MAE: 2082.53
   Final Validation MAE: 2095.61

6. Exporting to ONNX...
   ✅ Model successfully exported to models/fire_incident_model.onnx

7. Saving additional model 

In [30]:
rf_model=RandomForestRegressor(
    n_estimators=200,
    max_depth=15,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features='sqrt',
    random_state=42,
    n_jobs=-1,
    verbose=1
)
gb_model = GradientBoostingRegressor(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=6,
    min_samples_split=5,
    min_samples_leaf=2,
    subsample=0.8,
    random_state=42,
    verbose=1
)

rf_model.fit(X_train_combined, y_train_raw)
gb_model.fit(X_train_combined, y_train_raw)

# Evaluate Random Forest
rf_train_pred = rf_model.predict(X_train_combined)
rf_val_pred = rf_model.predict(X_val_combined)
rf_train_mae = np.mean(np.abs(rf_train_pred - y_train_raw))
rf_val_mae = np.mean(np.abs(rf_val_pred - y_val_raw))
print(f"\nRandom Forest - Train MAE: {rf_train_mae:.2f}, Val MAE: {rf_val_mae:.2f}")
# Evaluate Gradient Boosting
gb_train_pred = gb_model.predict(X_train_combined)
gb_val_pred = gb_model.predict(X_val_combined)
gb_train_mae = np.mean(np.abs(gb_train_pred - y_train_raw))
gb_val_mae = np.mean(np.abs(gb_val_pred - y_val_raw))
print(f"Gradient Boosting - Train MAE: {gb_train_mae:.2f}, Val MAE: {gb_val_mae:.2f}")

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 14 concurrent workers.
[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 172 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:    4.3s finished


      Iter       Train Loss      OOB Improve   Remaining Time 
         1     1841745.4109      126971.9531            5.47m
         2     1734641.5200      107894.5689            5.43m
         3     1644061.8830       74126.7832            5.41m
         4     1562624.6217       27886.7571            5.38m
         5     1518005.1880      113586.4946            5.33m
         6     1468199.8938       39042.7298            5.30m
         7     1430022.6035       46149.7437            5.26m
         8     1393803.1919       14245.5817            5.24m
         9     1367059.4643       30671.1643            5.21m
        10     1344083.2705       20464.4202            5.20m
        20     1250709.6433        2551.8625            4.97m
        30     1229793.1148       -8223.9757            4.72m
        40     1218596.9993       -6400.8993            4.47m
        50     1215796.0733      -13644.9952            4.22m
        60     1212671.1178       17968.8072            3.96m
       

[Parallel(n_jobs=14)]: Using backend ThreadingBackend with 14 concurrent workers.
[Parallel(n_jobs=14)]: Done  22 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 172 tasks      | elapsed:    0.2s
[Parallel(n_jobs=14)]: Done 200 out of 200 | elapsed:    0.3s finished
[Parallel(n_jobs=14)]: Using backend ThreadingBackend with 14 concurrent workers.
[Parallel(n_jobs=14)]: Done  22 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 172 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 200 out of 200 | elapsed:    0.1s finished



Random Forest - Train MAE: 902.81, Val MAE: 895.58
Gradient Boosting - Train MAE: 817.78, Val MAE: 804.76


In [31]:
#save onnx version of rf model
import skl2onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([None, input_features]))]
onnx_rf_model = convert_sklearn(rf_model, initial_types=initial_type)
onnx_rf_model_path = "models/fire_incident_rf_model.onnx"
with open(onnx_rf_model_path, "wb") as f:
    f.write(onnx_rf_model.SerializeToString())
print(f"   ✅ Random Forest model successfully exported to {onnx_rf_model_path}")


#save onnx version of gb model
onnx_gb_model = convert_sklearn(gb_model, initial_types=initial_type)
onnx_gb_model_path = "models/fire_incident_gb_model.onnx"
with open(onnx_gb_model_path, "wb") as f:
    f.write(onnx_gb_model.SerializeToString())
print(f"   ✅ Gradient Boosting model successfully exported to {onnx_gb_model_path}")

#

   ✅ Random Forest model successfully exported to models/fire_incident_rf_model.onnx
   ✅ Gradient Boosting model successfully exported to models/fire_incident_gb_model.onnx
