# LA Wildfire Prediction: Feature Engineering

This notebook creates additional features from the processed wildfire data to enhance the predictive power of our models.


In [19]:


import pandas as pd
import numpy as np
import os
from datetime import datetime

# For displaying plots in the notebook
%matplotlib inline


## Data Loading
The following function loads the processed wildfire dataset.


In [21]:
def load_processed_data(file_path):
   
    print(f"Loading processed data from {file_path}...")
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    print(f"Loaded data with shape: {df.shape}")
    
    # Print the head of loaded data
    print("\nLoaded Processed Data Head:")
    print(df.head())
    
    return df


## Time-Based Features
Create time-based features from the date column to capture seasonal and weekly patterns.


In [23]:
def create_time_features(df):
 
    print("Creating time-based features...")
    
    # Extract date components
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day_of_year'] = df['date'].dt.dayofyear
    df['day_of_week'] = df['date'].dt.dayofweek
    df['is_weekend'] = df['date'].dt.dayofweek.isin([5, 6]).astype(int)
    
    # Create season feature
    df['season'] = pd.cut(
        df['month'], 
        bins=[0, 3, 6, 9, 12], 
        labels=['Winter', 'Spring', 'Summer', 'Fall'],
        include_lowest=True
    )
    
    # Convert season to dummy variables
    df = pd.get_dummies(df, columns=['season'], drop_first=True)
    
    # Print the head after adding time features
    print("\nData Head After Time Features:")
    print(df.head())
    
    return df


## Rolling Window Features
Create rolling window features to capture temporal patterns in weather variables.


In [25]:
def create_rolling_features(df):
  
    print("Creating rolling window features...")
    
    # Ensure data is sorted by date and station
    df = df.sort_values(['STATION', 'date'])
    
    # Group by station
    grouped = df.groupby('STATION')
    
    # Create rolling features for each station
    rolling_dfs = []
    
    for station, group in grouped:
        # Create 3-day rolling averages
        if 'TMAX' in group.columns:
            group['TMAX_3D'] = group['TMAX'].rolling(window=3, min_periods=1).mean()
        if 'TMIN' in group.columns:
            group['TMIN_3D'] = group['TMIN'].rolling(window=3, min_periods=1).mean()
        if 'PRCP' in group.columns:
            group['PRCP_3D'] = group['PRCP'].rolling(window=3, min_periods=1).mean()
            group['PRCP_14D'] = group['PRCP'].rolling(window=14, min_periods=1).mean()
        if 'AWND' in group.columns:
            group['AWND_3D'] = group['AWND'].rolling(window=3, min_periods=1).mean()
        
        # Add to list of processed dataframes
        rolling_dfs.append(group)
    
    # Combine all processed dataframes
    df_rolling = pd.concat(rolling_dfs)
    
    # Print the head after adding rolling features
    print("\nData Head After Rolling Features:")
    print(df_rolling.head())
    
    return df_rolling


## Dryness Features
Create features related to dryness conditions, which are critical for fire risk assessment.


In [27]:
def create_dryness_features(df):

    print("Creating dryness features...")
    
    # Ensure data is sorted by date and station
    df = df.sort_values(['STATION', 'date'])
    
    # Group by station
    grouped = df.groupby('STATION')
    
    # Create dryness features for each station
    dryness_dfs = []
    
    for station, group in grouped:
        # Calculate days since last rain
        if 'PRCP' in group.columns:
            # Consider a day as rainy if precipitation > 0.01 inches
            rainy_days = group['PRCP'] > 0.01
            
            # Calculate days since last rain
            days_since_rain = np.zeros(len(group))
            counter = 0
            
            for i in range(len(group)):
                if rainy_days.iloc[i]:
                    counter = 0
                else:
                    counter += 1
                days_since_rain[i] = counter
            
            group['days_since_rain'] = days_since_rain
            
            # Create a drought severity index (simple version)
            # Combine temperature and lack of precipitation
            if 'TMAX' in group.columns:
                group['drought_index'] = (group['TMAX'] / 100) * (group['days_since_rain'] / 30)
                
                # Clip to reasonable range [0, 1]
                group['drought_index'] = group['drought_index'].clip(0, 1)
        
        # Add to list of processed dataframes
        dryness_dfs.append(group)
    
    # Combine all processed dataframes
    df_dryness = pd.concat(dryness_dfs)
    
    # Print the head after adding dryness features
    print("\nData Head After Dryness Features:")
    print(df_dryness.head())
    
    return df_dryness


## Fire Spread Potential
Create a composite index that combines temperature, wind, and dryness components to estimate fire spread potential.


In [29]:
def create_fire_spread_potential(df):
  
    print("Creating fire spread potential index...")
    
    # Components for fire spread potential
    # 1. Temperature component
    if 'TMAX' in df.columns:
        # Normalize temperature (assuming max temp in LA rarely exceeds 110°F)
        temp_component = df['TMAX'] / 110
    else:
        temp_component = 0.5  # Default if no temperature data
    
    # 2. Wind component
    if 'AWND' in df.columns:
        # Normalize wind speed (assuming max wind in LA rarely exceeds 30 mph)
        wind_component = df['AWND'] / 30
    else:
        wind_component = 0.5  # Default if no wind data
    
    # 3. Dryness component
    if 'days_since_rain' in df.columns:
        # Normalize dryness (30 days without rain is considered severe)
        dryness_component = df['days_since_rain'] / 30
    elif 'dry_streak' in df.columns:
        dryness_component = df['dry_streak'] / 30
    else:
        dryness_component = 0.5  # Default if no dryness data
    
    # Calculate fire spread potential index
    # Weighted combination of components
    df['fire_spread_potential'] = (
        0.3 * temp_component + 
        0.4 * wind_component + 
        0.3 * dryness_component
    )
    
    # Clip to range [0, 1]
    df['fire_spread_potential'] = df['fire_spread_potential'].clip(0, 1)
    
    # Print the head after adding fire spread potential
    print("\nData Head After Fire Spread Potential:")
    print(df.head())
    
    return df


## Save Engineered Data
Save the dataframe with all engineered features to a CSV file.


In [31]:
def save_engineered_data(df, output_path):
  
    print(f"Saving engineered data to {output_path}...")
    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    df.to_csv(output_path, index=False)
    print(f"Saved engineered data with shape: {df.shape}")
    
    # Print the head of final engineered data
    print("\nFinal Engineered Data Head:")
    print(df.head())


## Main Execution
Run the complete feature engineering pipeline.


In [33]:
def main():
    # Define file paths
    input_path = "../data/processed/processed_la_fire_data.csv"
    output_path = "../data/processed/engineered_la_fire_data.csv"
    
    # Load processed data
    df = load_processed_data(input_path)
    
    # Create features
    df = create_time_features(df)
    df = create_rolling_features(df)
    df = create_dryness_features(df)
    df = create_fire_spread_potential(df)
    
    # Save engineered data
    save_engineered_data(df, output_path)
    
    print("Feature engineering completed successfully!")
    
    return df


In [35]:
# Execute the feature engineering pipeline
engineered_df = main()


Loading processed data from ../data/processed/processed_la_fire_data.csv...
Loaded data with shape: (224986, 35)

Loaded Processed Data Head:
        date  Fire_Occurred STATION NAME  AWND  DAPR  MDPR  PGTM  PRCP  TAVG  \
0 2014-12-27              0       0    0   0.0   0.0   0.0   0.0   0.0   0.0   
1 2014-12-28              0       0    0   0.0   0.0   0.0   0.0   0.0   0.0   
2 2014-12-29              0       0    0   0.0   0.0   0.0   0.0   0.0   0.0   
3 2014-12-30              0       0    0   0.0   0.0   0.0   0.0   0.0   0.0   
4 2014-12-31              0       0    0   0.0   0.0   0.0   0.0   0.0   0.0   

   ...  WT11  year  month  PRCP_7D  AWND_7D  PRCP_prev  AWND_prev  is_dry  \
0  ...   0.0   0.0    0.0      0.0      0.0        0.0        0.0     0.0   
1  ...   0.0   0.0    0.0      0.0      0.0        0.0        0.0     0.0   
2  ...   0.0   0.0    0.0      0.0      0.0        0.0        0.0     0.0   
3  ...   0.0   0.0    0.0      0.0      0.0        0.0        0.0    