In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import holidays
import joblib
import os
from datetime import datetime
import joblib
warnings.filterwarnings('ignore')


In [2]:
def engineer_features(df):
    """
    FINAL CORRECTED version using merge operations to avoid index issues
    """
    import pandas as pd
    import numpy as np
    import holidays
    
    # Make a copy to avoid modifying the original
    data = df.copy()
    
    # Ensure datetime is properly parsed
    if data['datetime'].dtype == 'object':
        data['datetime'] = pd.to_datetime(data['datetime'])
    
    # Create incident_datetime if it doesn't exist
    if 'incident_datetime' not in data.columns:
        data['incident_datetime'] = data['datetime']
    
    # Add row identifier to preserve order
    data['_row_id'] = range(len(data))
    
    # ===== TEMPORAL FEATURES =====
    
    # Basic time components
    data['hour'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek  # 0=Monday, 6=Sunday
    data['month'] = data['datetime'].dt.month
    data['quarter'] = data['datetime'].dt.quarter
    data['day_of_year'] = data['datetime'].dt.dayofyear
    
    # Binary temporal indicators
    data['is_weekend'] = data['day_of_week'].isin([5, 6]).astype(int)  # Saturday, Sunday
    data['is_night'] = ((data['hour'] >= 22) | (data['hour'] <= 6)).astype(int)  # 10 PM - 6 AM
    data['is_rush_hour'] = (((data['hour'] >= 7) & (data['hour'] <= 9)) | 
                           ((data['hour'] >= 16) & (data['hour'] <= 18))).astype(int)  # Morning & evening rush
    data['is_business_hours'] = ((data['hour'] >= 9) & (data['hour'] <= 17) & 
                                (data['day_of_week'] < 5)).astype(int)  # 9 AM - 5 PM weekdays
    
    # Season mapping
    data['season'] = data['month'].map({
        12: 'Winter', 1: 'Winter', 2: 'Winter',
        3: 'Spring', 4: 'Spring', 5: 'Spring',
        6: 'Summer', 7: 'Summer', 8: 'Summer',
        9: 'Fall', 10: 'Fall', 11: 'Fall'
    })
    
    # Shift mapping using a proper approach
    def get_shift(hour):
        if 6 <= hour < 14:
            return 'Day'
        elif 14 <= hour < 22:
            return 'Evening'
        else:
            return 'Night'
    
    data['shift'] = data['hour'].apply(get_shift)
    
    # Holiday indicator (US federal holidays)
    us_holidays = holidays.US()
    data['is_holiday'] = data['datetime'].dt.date.isin(us_holidays).astype(int)
    
    # ===== GEOGRAPHIC FEATURES =====
    
    # Distance from center (assuming Nashville city center: 36.1627, -86.7816)
    nashville_center_lat = 36.1627
    nashville_center_lon = -86.7816
    
    def haversine_distance(lat1, lon1, lat2, lon2):
        """Calculate the great circle distance between two points on earth"""
        R = 3959  # Earth's radius in miles
        
        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        
        a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
        c = 2 * np.arcsin(np.sqrt(a))
        distance = R * c
        
        return distance
    
    data['distance_from_center'] = data.apply(lambda row: haversine_distance(
        row['lat'], row['lon'], nashville_center_lat, nashville_center_lon), axis=1)
    
    # ===== WORKLOAD FEATURES USING MERGE APPROACH =====
    
    # Sort by datetime for all calculations
    data = data.sort_values('datetime').reset_index(drop=True)
    
    print("Calculating workload features using merge approach...")
    
    # 1. System-wide incidents in last hour
    data_temp = data.set_index('datetime')
    data_temp['count_helper'] = 1
    system_workload = data_temp['count_helper'].rolling('1h', closed='left').sum().fillna(0)
    data['system_incidents_last_hour'] = system_workload.values
    
    # 2. Category-specific incidents in last 24 hours using merge approach
    if 'category' in data.columns:
        print("Calculating category-specific workload...")
        category_workload_list = []
        
        for category in data['category'].unique():
            if pd.isna(category):
                continue
            
            # Get data for this category only
            cat_data = data[data['category'] == category][['datetime', '_row_id']].copy()
            cat_data = cat_data.set_index('datetime')
            cat_data['count_helper'] = 1
            
            # Calculate rolling sum for this category
            cat_rolling = cat_data['count_helper'].rolling('24h', closed='left').sum().fillna(0)
            
            # Create dataframe with results
            cat_results = pd.DataFrame({
                '_row_id': cat_data['_row_id'],
                'category_incidents_last_24h': cat_rolling.values
            })
            
            category_workload_list.append(cat_results)
        
        # Combine all category results
        if category_workload_list:
            all_category_workload = pd.concat(category_workload_list, ignore_index=True)
            # Merge back to main data
            data = data.merge(all_category_workload, on='_row_id', how='left')
            data['category_incidents_last_24h'] = data['category_incidents_last_24h'].fillna(0)
        else:
            data['category_incidents_last_24h'] = 0
    else:
        data['category_incidents_last_24h'] = 0
    
    # 3. Zone-specific incidents using merge approach
    if 'ZONE_ID' in data.columns:
        print("Calculating zone-specific workload...")
        zone_workload_list = []
        
        for zone_id in data['ZONE_ID'].unique():
            if pd.isna(zone_id):
                continue
            
            # Get data for this zone only
            zone_data = data[data['ZONE_ID'] == zone_id][['datetime', '_row_id']].copy()
            zone_data = zone_data.set_index('datetime')
            zone_data['count_helper'] = 1
            
            # Calculate rolling sums for this zone
            zone_rolling_week = zone_data['count_helper'].rolling('7D', closed='left').sum().fillna(0)
            zone_rolling_month = zone_data['count_helper'].rolling('30D', closed='left').sum().fillna(0)
            
            # Create dataframe with results
            zone_results = pd.DataFrame({
                '_row_id': zone_data['_row_id'],
                'zone_incidents_last_week': zone_rolling_week.values,
                'zone_incidents_last_month': zone_rolling_month.values
            })
            
            zone_workload_list.append(zone_results)
        
        # Combine all zone results
        if zone_workload_list:
            all_zone_workload = pd.concat(zone_workload_list, ignore_index=True)
            # Merge back to main data
            data = data.merge(all_zone_workload, on='_row_id', how='left')
            data['zone_incidents_last_week'] = data['zone_incidents_last_week'].fillna(0)
            data['zone_incidents_last_month'] = data['zone_incidents_last_month'].fillna(0)
        else:
            data['zone_incidents_last_week'] = 0
            data['zone_incidents_last_month'] = 0
    else:
        data['zone_incidents_last_week'] = 0
        data['zone_incidents_last_month'] = 0
    
    # Remove helper column
    data = data.drop(columns=['_row_id'])
    
    print("Workload feature calculation completed!")
    
    return data


In [3]:

class FireIncidentResponsePredictor:
    def __init__(self):
        self.models = {}
        self.best_model = None
        self.best_model_name = None
        self.preprocessor = None
        self.feature_columns = []
        self.is_trained = False
        self.training_results = {}
        
    def _prepare_features(self, data, target_column='response_time'):
        """Prepare features from data - used internally"""
        # Apply feature engineering if not already done
        if 'incident_datetime' not in data.columns:
            data = engineer_features(data.copy())
        
        # Define feature groups
        temporal_features = [
            'hour', 'day_of_week', 'month', 'quarter', 'day_of_year',
            'is_weekend', 'is_night', 'is_rush_hour', 'is_business_hours', 'is_holiday'
        ]
        
        workload_features = [
            'category_incidents_last_24h', 'system_incidents_last_hour'
        ]
        
        geographic_features = [
            'lat', 'lon', 'distance_from_center', 'zone_incidents_last_week', 'zone_incidents_last_month'
        ]
        
        categorical_features = [
            'shift', 'season', 'category', 'ZONE_ID', 'incident_type'
        ]
        
        # Combine all numerical features
        numerical_features = temporal_features + workload_features + geographic_features
        
        # Filter features that exist in the data
        available_numerical = [f for f in numerical_features if f in data.columns]
        available_categorical = [f for f in categorical_features if f in data.columns]
        
        # Store feature columns for later use
        feature_columns = available_numerical + available_categorical
        
        # Filter valid data
        if target_column in data.columns:
            valid_mask = (
                data[target_column].notna() & 
                (data[target_column] > 0)
            )
            df_clean = data[valid_mask].copy()
        else:
            df_clean = data.copy()
        
        return df_clean, feature_columns, available_numerical, available_categorical
    
    def train(self, train_data, target_column='response_time_seconds', test_size=0.15, val_size=0.15):
        """Train all models on the provided dataset"""
        
        print("Fire Incident Response Time Prediction - Training")
        print("=" * 60)
        
        # Prepare training data
        df_clean, feature_columns, available_numerical, available_categorical = self._prepare_features(
            train_data, target_column
        )
        
        self.feature_columns = feature_columns
        
        print(f"Available numerical features: {len(available_numerical)}")
        print(f"Available categorical features: {len(available_categorical)}")
        print(f"Valid training samples: {len(df_clean)}")
        
        X = df_clean[feature_columns]
        y = df_clean[target_column]
        
        # Create preprocessor
        numerical_transformer = StandardScaler()
        categorical_transformer = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')
        
        self.preprocessor = ColumnTransformer(
            transformers=[
                ('num', numerical_transformer, available_numerical),
                ('cat', categorical_transformer, available_categorical)
            ]
        )
        
        # Initialize models
        self._initialize_models()
        
        # Split data (temporal split if datetime available)
        if 'incident_datetime' in df_clean.columns:
            sort_idx = df_clean['incident_datetime'].argsort()
            sorted_X = X.iloc[sort_idx]
            sorted_y = y.iloc[sort_idx]
            
            n_samples = len(sorted_X)
            train_end = int(n_samples * (1 - test_size - val_size))
            val_end = int(n_samples * (1 - test_size))
            
            X_train = sorted_X.iloc[:train_end]
            X_val = sorted_X.iloc[train_end:val_end]
            X_test = sorted_X.iloc[val_end:]
            
            y_train = sorted_y.iloc[:train_end]
            y_val = sorted_y.iloc[train_end:val_end]
            y_test = sorted_y.iloc[val_end:]
        else:
            X_temp, X_test, y_temp, y_test = train_test_split(
                X, y, test_size=test_size, random_state=42
            )
            val_size_adjusted = val_size / (1 - test_size)
            X_train, X_val, y_train, y_val = train_test_split(
                X_temp, y_temp, test_size=val_size_adjusted, random_state=42
            )
        
        print(f"\nTraining set: {len(X_train)}")
        print(f"Validation set: {len(X_val)}")
        print(f"Test set: {len(X_test)}")
        
        # Train all models
        self.training_results = {}
        
        for name, model in self.models.items():
            print(f"\nTraining {name}...")
            
            # Create pipeline
            pipeline = Pipeline([
                ('preprocessor', self.preprocessor),
                ('regressor', model)
            ])
            
            # Hyperparameter tuning
            if name in self.param_grids:
                grid_search = GridSearchCV(
                    pipeline, self.param_grids[name], 
                    cv=2, scoring='neg_mean_absolute_error', n_jobs=-1
                )
                grid_search.fit(X_train, y_train)
                best_pipeline = grid_search.best_estimator_
                print(f"Best parameters: {grid_search.best_params_}")
            else:
                best_pipeline = pipeline
                best_pipeline.fit(X_train, y_train)
            
            # Store trained model
            self.models[name] = best_pipeline
            
            # Evaluate
            y_val_pred = best_pipeline.predict(X_val)
            y_test_pred = best_pipeline.predict(X_test)
            
            # Metrics
            val_mae = mean_absolute_error(y_val, y_val_pred)
            test_mae = mean_absolute_error(y_test, y_test_pred)
            test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
            test_r2 = r2_score(y_test, y_test_pred)
            
            self.training_results[name] = {
                'val_mae': val_mae,
                'test_mae': test_mae,
                'test_rmse': test_rmse,
                'test_r2': test_r2
            }
            
            print(f"Validation MAE: {val_mae:.2f}, Test MAE: {test_mae:.2f}, Test R²: {test_r2:.3f}")
        
        # Find best model
        self.best_model_name = min(self.training_results.keys(), 
                                 key=lambda x: self.training_results[x]['test_mae'])
        self.best_model = self.models[self.best_model_name]
        self.is_trained = True
        
        print(f"\n" + "="*60)
        print(f"TRAINING COMPLETED")
        print(f"Best Model: {self.best_model_name}")
        print(f"Best Test MAE: {self.training_results[self.best_model_name]['test_mae']:.2f}")
        print("="*60)
        
        return self.training_results
    
    def test(self, test_data, target_column='response_time_seconds', models_to_test=None):
        """Test trained models on new dataset"""
        
        if not self.is_trained:
            raise ValueError("Models not trained yet. Call train() first.")
        
        print(f"\nTesting models on new dataset...")
        print("=" * 50)
        
        # Prepare test data
        df_clean, _, _, _ = self._prepare_features(test_data, target_column)
        
        # Use same feature columns as training
        available_features = [f for f in self.feature_columns if f in df_clean.columns]
        missing_features = set(self.feature_columns) - set(available_features)
        
        if missing_features:
            print(f"Warning: Missing features in test data: {missing_features}")
        
        X_test = df_clean[available_features]
        y_test = df_clean[target_column] if target_column in df_clean.columns else None
        
        print(f"Test samples: {len(X_test)}")
        
        # Test specified models or all models
        models_to_evaluate = models_to_test or list(self.models.keys())
        test_results = {}
        
        for model_name in models_to_evaluate:
            if model_name not in self.models:
                print(f"Warning: Model '{model_name}' not found")
                continue
            
            try:
                model = self.models[model_name]
                y_pred = model.predict(X_test)
                
                result = {
                    'predictions': y_pred,
                    'model': model
                }
                
                # Calculate metrics if target is available
                if y_test is not None:
                    mae = mean_absolute_error(y_test, y_pred)
                    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                    r2 = r2_score(y_test, y_pred)

                    mse = mean_squared_error(y_test, y_pred)
                    rmse = np.sqrt(mse)
                    
                    # Bias (mean error)
                    bias = np.mean(y_pred - y_test)
                    
                    # Variance of predictions
                    variance = np.var(y_pred)
                    
                    # Additional metrics
                    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100  # Mean Absolute Percentage Error
                    r2 = 1 - (np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))  # R-squared
                    result.update({
                        'mae': mae,
                        'rmse': rmse,
                        'r2': r2,
                        'actual': y_test,
                        'mape': mape,
                        'bias': bias,
                        'variance': variance
                    })
                    
                    print(f"{model_name:20} - MAE: {mae:6.2f}, RMSE: {rmse:6.2f}, R²: {r2:6.3f}, MAPE: {mape:6.2f}%, Bias: {bias:6.2f}, Variance: {variance:6.2f}")
                else:
                    print(f"{model_name:20} - Predictions generated (no target for evaluation)")
                
                test_results[model_name] = result
                
            except Exception as e:
                print(f"Error testing {model_name}: {e}")
        
        return test_results
    
    def predict(self, new_data, model_name=None, feature_columns=None):
        """Make predictions on new data using specified model or best model"""
        
        if not self.is_trained:
            raise ValueError("Models not trained yet. Call train() first.")
        
        # Use specified model or best model
        model_to_use = model_name or self.best_model_name
        
        if model_to_use not in self.models:
            raise ValueError(f"Model '{model_to_use}' not found")
        
        # Prepare data
        df_clean, _, _, _ = self._prepare_features(new_data)
        print(df_clean.shape)
        # Use same feature columns as training
        if feature_columns is not None:
            available_features = [f for f in feature_columns if f in df_clean.columns]
        else:
            available_features = [f for f in self.feature_columns if f in df_clean.columns]
        print(available_features)
        X = df_clean[available_features]
        # Make predictions
        model = self.models[model_to_use]
        predictions = model.predict(X)
        
        return predictions
    
    def save_models(self, save_dir="models", prefix="fire_models"):
        """Save all trained models"""
        
        if not self.is_trained:
            raise ValueError("No trained models to save")
        
        os.makedirs(save_dir, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d")
        
        # Save individual models
        model_paths = {}
        for model_name, model in self.models.items():
            clean_name = model_name.replace(" ", "_").lower()
            model_path = f"{save_dir}/{prefix}_{clean_name}_{timestamp}.pkl"
            joblib.dump(model, model_path)
            model_paths[model_name] = model_path
        
        # Save predictor state
        predictor_state = {
            'feature_columns': self.feature_columns,
            'best_model_name': self.best_model_name,
            'training_results': self.training_results,
            'model_paths': model_paths
        }
        
        state_path = f"{save_dir}/{prefix}_state_{timestamp}.pkl"
        joblib.dump(predictor_state, state_path)
        
        print(f"Models saved to {save_dir} with timestamp {timestamp}")
        return timestamp
    
    def load_models(self, save_dir="models", timestamp=None, prefix="fire_models"):
        """Load previously saved models"""
        
        import glob
        
        if timestamp is None:
            # Find most recent
            state_files = glob.glob(f"{save_dir}/{prefix}_state_*.pkl")
            if not state_files:
                raise ValueError("No saved models found")
            state_path = max(state_files, key=os.path.getctime)
        else:
            state_path = f"{save_dir}/{prefix}_state_{timestamp}.pkl"
        
        # Load predictor state
        predictor_state = joblib.load(state_path)
        
        self.feature_columns = predictor_state['feature_columns']
        self.best_model_name = predictor_state['best_model_name']
        self.training_results = predictor_state['training_results']
        
        # Load individual models
        self.models = {}
        for model_name, model_path in predictor_state['model_paths'].items():
            self.models[model_name] = joblib.load(model_path)
        
        self.best_model = self.models[self.best_model_name]
        self.is_trained = True
        
        print(f"Models loaded successfully")
        print(f"Available models: {list(self.models.keys())}")
        print(f"Best model: {self.best_model_name}")
    
    def get_model_performance(self):
        """Get training performance of all models"""
        if not self.is_trained:
            return "No trained models available"
        
        return pd.DataFrame(self.training_results).T.sort_values('test_mae')
    
    def _initialize_models(self):
        """Initialize regression models with fire-specific parameters"""
        self.models = {
            'Linear Regression': LinearRegression(),
            'Ridge Regression': Ridge(alpha=1.0),
            'Lasso Regression': Lasso(alpha=1.0),
            'Elastic Net': ElasticNet(alpha=1.0),
            'Random Forest': RandomForestRegressor(n_estimators=50, max_depth=10, random_state=42),
            'Extra Trees': ExtraTreesRegressor(n_estimators=50, max_depth=10, random_state=42),
            'Gradient Boosting': GradientBoostingRegressor(n_estimators=50, max_depth=4, random_state=42),

        }
        
        # Enhanced hyperparameter grids
        self.param_grids = {
            'Ridge Regression': {'regressor__alpha': [0.1, 1.0, 10.0, 100.0]},
            'Lasso Regression': {'regressor__alpha': [0.1, 1.0, 10.0, 100.0]},
            'Elastic Net': {
                'regressor__alpha': [0.1, 1.0, 10.0],
                'regressor__l1_ratio': [0.1, 0.5, 0.9]
            },
            'Random Forest': {
                'regressor__n_estimators': [50, 100],
                'regressor__max_depth': [5, 10],
                'regressor__min_samples_split': [5, 10]
            },
            'Extra Trees': {
                'regressor__n_estimators': [50, 100],
                'regressor__max_depth': [5, 10], 
                'regressor__min_samples_split': [5, 10]
            },
            'Gradient Boosting': {
                'regressor__n_estimators': [50, 100],
                'regressor__learning_rate': [0.1, 0.2],
                'regressor__max_depth': [3, 4]
            },

        }


predictor = FireIncidentResponsePredictor()

In [None]:

incidents=pd.read_csv('scripts/incidents_data_for_modeling2.csv')  # Example data loading

In [4]:

incidents=pd.read_csv('scripts/incidents_data_for_modeling2.csv')  # Example data loading
incidents_featurized = engineer_features(incidents)  # Feature engineering on a subset for speed
# engineer_features.train()


Calculating workload features using merge approach...
Calculating category-specific workload...
Calculating zone-specific workload...
Workload feature calculation completed!


In [5]:
# Pandas show all columns
pd.set_option('display.max_columns', None)

In [6]:
predictor.models['Random Forest'] = joblib.load('scripts/fire_models_random_forest_20250917.pkl')
predictor.is_trained = True
_, feature_columns, _, _ = predictor._prepare_features(incidents_featurized)
predictor.predict(incidents_featurized, model_name='Random Forest', feature_columns=feature_columns)

(92559, 27)
['hour', 'day_of_week', 'month', 'quarter', 'day_of_year', 'is_weekend', 'is_night', 'is_rush_hour', 'is_business_hours', 'is_holiday', 'category_incidents_last_24h', 'system_incidents_last_hour', 'lat', 'lon', 'distance_from_center', 'zone_incidents_last_week', 'zone_incidents_last_month', 'shift', 'season', 'category', 'ZONE_ID', 'incident_type']


array([2479.38960092, 1983.98320741, 2139.6307051 , ..., 2159.52443304,
       2008.03492462, 1953.82326073], shape=(92559,))

In [11]:
df_clean[feature_columns]

Unnamed: 0,hour,day_of_week,month,quarter,day_of_year,is_weekend,is_night,is_rush_hour,is_business_hours,is_holiday,category_incidents_last_24h,system_incidents_last_hour,lat,lon,distance_from_center,zone_incidents_last_week,zone_incidents_last_month,shift,season,category,ZONE_ID,incident_type
0,0,0,1,1,1,0,1,0,0,0,0.0,0.0,36.213787,-86.596595,10.904416,0.0,0.0,Night,Winter,Nine,210.0,Nine
1,0,0,1,1,1,0,1,0,0,0,1.0,1.0,36.160913,-86.776837,0.292999,0.0,0.0,Night,Winter,Nine,327.0,Nine
2,0,0,1,1,1,0,1,0,0,0,2.0,2.0,36.044009,-86.628408,11.849202,0.0,0.0,Night,Winter,Nine,222.0,Nine
3,0,0,1,1,1,0,1,0,0,0,0.0,3.0,36.151075,-86.761818,1.365001,0.0,0.0,Night,Winter,Five,324.0,Five
4,0,0,1,1,1,0,1,0,0,0,3.0,4.0,36.226771,-86.725614,5.417207,0.0,0.0,Night,Winter,Nine,174.0,Nine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92554,23,6,6,2,173,1,1,0,0,0,343.0,15.0,36.104282,-86.742205,4.596416,9.0,31.0,Night,Summer,Nine,111.0,Nine
92555,23,6,6,2,173,1,1,0,0,0,344.0,15.0,36.158816,-86.798171,0.962613,12.0,27.0,Night,Summer,Nine,88.0,Nine
92556,23,6,6,2,173,1,1,0,0,0,344.0,16.0,36.199841,-86.712963,4.608699,11.0,45.0,Night,Summer,Nine,69.0,Nine
92557,23,6,6,2,173,1,1,0,0,0,344.0,17.0,36.125805,-86.765879,2.696055,9.0,37.0,Night,Summer,Nine,318.0,Nine


In [12]:
df_clean[feature_columns].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92559 entries, 0 to 92558
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   hour                         92559 non-null  int32  
 1   day_of_week                  92559 non-null  int32  
 2   month                        92559 non-null  int32  
 3   quarter                      92559 non-null  int32  
 4   day_of_year                  92559 non-null  int32  
 5   is_weekend                   92559 non-null  int64  
 6   is_night                     92559 non-null  int64  
 7   is_rush_hour                 92559 non-null  int64  
 8   is_business_hours            92559 non-null  int64  
 9   is_holiday                   92559 non-null  int64  
 10  category_incidents_last_24h  92559 non-null  float64
 11  system_incidents_last_hour   92559 non-null  float64
 12  lat                          92559 non-null  float64
 13  lon             

In [None]:
# df_clean, feature_columns, available_numerical, available_categorical = predictor._prepare_features(incidents_featurized)
# available_features = [f for f in predictor.feature_columns if f in df_clean.columns]
# X = df_clean[available_features]
# available_features

[]

In [8]:
# predictor.predict(incidents_featurized, model_name='Random Forest')