In [1]:
# adding a new holiday for summer break
# 1/6-31/8
# days from holiday for summer break will be taken from 1/7 for equality from left and right to the date.
# use it the same as the rest of the features
# we can add the dates of the missing holidays by hand using Google and beloved ChatGPT

# check the option for starting a "timer"- check the average time for each cause to start a fire. 

We worked on adding and exploring features related to time.
We decided to check whether there is a connection between holidays and the type of the cause of the fire.
The rational was that during holidays and vacations, people are returning home and may have more time to travel (campfire) or they just get bored and do some stupid things (children, arson...).
Therefore, we downloaded a dataset that contains the dates of some holidays in the USA and added to our dataset the distance of every fire from the closest holiday in days.
The added features are "nearest_holiday", "days_from_closest_holiday".

In [2]:
import pandas as pd
from datetime import datetime, timedelta

In [3]:
wildfire_df = pd.read_csv("data.csv")
holiday_dates_df = pd.read_csv(f"US Holiday Dates (2004-2021).csv")

In [4]:
# Convert 'Date' in holiday_dates_df to datetime objects
holiday_dates_df['Date'] = pd.to_datetime(holiday_dates_df['Date'])

# Create a dictionary to map each year to its holidays
holiday_dict = {}
for year in holiday_dates_df['Year'].unique():
    holiday_dict[year] = holiday_dates_df[holiday_dates_df['Year'] == year]

def find_nearest_holiday(fire_year, discovery_doy):
    if fire_year not in holiday_dict:
        return ("No Data", "No Data")
    year_holidays = holiday_dict[fire_year]
    fire_date = datetime(fire_year, 1, 1) + timedelta(days=discovery_doy - 1)
    nearest_holiday = None
    min_days_diff = float('inf')
    for _, row in year_holidays.iterrows():
        holiday_date = row['Date']
        diff = abs((fire_date - holiday_date).days)
        if diff < min_days_diff:
            min_days_diff = diff
            nearest_holiday = row['Holiday']
    return (nearest_holiday, min_days_diff)

# Apply the function to each row in the wildfire dataset
wildfire_df['nearest_holiday'], wildfire_df['days_from_closest_holiday'] = zip(*wildfire_df.apply(lambda row: find_nearest_holiday(row['FIRE_YEAR'], row['DISCOVERY_DOY']), axis=1))

In [5]:
wildfire_df.head(10)

Unnamed: 0.1,Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,...,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape,nearest_holiday,days_from_closest_holiday
0,0,1464479,201104109,W-626311,FED,DOI-WFMI,BIA,USMTFBA,Fort Belknap Agency,MTFBA,...,-108.2896,8.0,PRIVATE,MT,,,,b'\x00\x01\xad\x10\x00\x00\xf8:p\xce\x88\x12[\...,Labor Day Weekend,12
1,1,1591094,201608816,SFO-NE-2012-16029,NONFED,ST-NASF,ST/C&L,USNENES,Nebraska Department of Forestry,NENFS,...,-99.08913,14.0,MISSING/NOT SPECIFIED,NE,,,,b'\x00\x01\xad\x10\x00\x00\xe8\xc5PN\xb4\xc5X\...,Memorial Day,9
2,2,944726,1072277,SWRA_VA_11300,NONFED,ST-VAVAS,ST/C&L,USVAVAS,Virginia Department of Forestry,VAVAS3,...,-78.2692,14.0,MISSING/NOT SPECIFIED,VA,,,,b'\x00\x01\xad\x10\x00\x00P\x05\xa3\x92:\x91S\...,No Data,No Data
3,3,896277,1022153,SWRA_LA_24392,NONFED,ST-LALAS,ST/C&L,USLALAS,Louisiana Office of Forestry,LALAS1,...,-89.8639,14.0,MISSING/NOT SPECIFIED,LA,,,,b'\x00\x01\xad\x10\x00\x00\xe8\xc09#JwV\xc0\xf...,No Data,No Data
4,4,1269039,1673620,SFO-KY-20320089011,NONFED,ST-NASF,ST/C&L,USKYKYS,Kentucky Division of Forestry,KYKYS,...,-84.34258,14.0,MISSING/NOT SPECIFIED,KY,Rockcastle,203.0,Rockcastle,b'\x00\x01\xad\x10\x00\x00\xdc\x10\xaa\xd4\xec...,Eastern Easter,9
5,5,1453845,20009452,MOSFM-11020,NONFED,ST-MOMOS,ST/C&L,USMOMOS,Missouri Department of Conservation,00506,...,-94.92671,8.0,PRIVATE,MO,BARRY,9.0,Barry,b'\x00\x01\xad\x10\x00\x00\x10\xb8u7O\xbbW\xc0...,No Data,No Data
6,6,832173,953407,STATE_MS_93764,NONFED,ST-MSMSS,ST/C&L,USMSMSS,Mississippi Forestry Commission,MS South Central,...,-89.2756,14.0,MISSING/NOT SPECIFIED,MS,,,,b'\x00\x01\xad\x10\x00\x00\xc0\xb1.n\xa3QV\xc0...,No Data,No Data
7,7,500074,537645,SFO-MN0349-920122,NONFED,ST-NASF,ST/C&L,USMNMNS,Minnesota Department of Natural Resources,MNMNS,...,-93.568452,14.0,MISSING/NOT SPECIFIED,MN,Mille Lacs,95.0,Mille Lacs,b'\x00\x01\xad\x10\x00\x00Hj\x91\x82adW\xc0\xe...,No Data,No Data
8,8,5332,5356,FS-1429214,FED,FS-FIRESTAT,FS,USMNSUF,Superior National Forest,0909,...,-90.999167,5.0,USFS,MN,31,31.0,Cook,b'\x00\x01\xad\x10\x00\x00\xcc\xb9\xc2X\xf2\xb...,4th of July,25
9,9,554703,596457,SFO-NJ0285-99_A051506,NONFED,ST-NASF,ST/C&L,USNJNJS,New Jersey Forest Fire Service,NJNJA,...,-74.49166,14.0,MISSING/NOT SPECIFIED,NJ,Passaic,31.0,Passaic,b'\x00\x01\xad\x10\x00\x00\x140\x81[w\x9fR\xc0...,No Data,No Data


We also noticed that there are specific reasons that have more incidents along different hours during the day.
We considered adding a feature of "part_of_day" in which the fire took place, but we realized that we already have the distribution by hours so this feature will not be very useful.

We will akso add here a season feature that will help predict the cause type (summer + close to nature = lightning?) 

In [7]:
wildfire_df['DISCOVERY_DATE'] = pd.to_datetime(wildfire_df['DISCOVERY_DATE'], unit='D', origin='julian')
def get_season(date):
    month = date.month
    if month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    elif month in [9, 10, 11]:
        return 'Fall'
    else:
        return 'Winter'
wildfire_df['Season'] = wildfire_df['DISCOVERY_DATE'].apply(get_season)

In [8]:
wildfire_df.head(10)

Unnamed: 0.1,Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,...,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape,nearest_holiday,days_from_closest_holiday,Season
0,0,1464479,201104109,W-626311,FED,DOI-WFMI,BIA,USMTFBA,Fort Belknap Agency,MTFBA,...,8.0,PRIVATE,MT,,,,b'\x00\x01\xad\x10\x00\x00\xf8:p\xce\x88\x12[\...,Labor Day Weekend,12,Summer
1,1,1591094,201608816,SFO-NE-2012-16029,NONFED,ST-NASF,ST/C&L,USNENES,Nebraska Department of Forestry,NENFS,...,14.0,MISSING/NOT SPECIFIED,NE,,,,b'\x00\x01\xad\x10\x00\x00\xe8\xc5PN\xb4\xc5X\...,Memorial Day,9,Spring
2,2,944726,1072277,SWRA_VA_11300,NONFED,ST-VAVAS,ST/C&L,USVAVAS,Virginia Department of Forestry,VAVAS3,...,14.0,MISSING/NOT SPECIFIED,VA,,,,b'\x00\x01\xad\x10\x00\x00P\x05\xa3\x92:\x91S\...,No Data,No Data,Winter
3,3,896277,1022153,SWRA_LA_24392,NONFED,ST-LALAS,ST/C&L,USLALAS,Louisiana Office of Forestry,LALAS1,...,14.0,MISSING/NOT SPECIFIED,LA,,,,b'\x00\x01\xad\x10\x00\x00\xe8\xc09#JwV\xc0\xf...,No Data,No Data,Spring
4,4,1269039,1673620,SFO-KY-20320089011,NONFED,ST-NASF,ST/C&L,USKYKYS,Kentucky Division of Forestry,KYKYS,...,14.0,MISSING/NOT SPECIFIED,KY,Rockcastle,203.0,Rockcastle,b'\x00\x01\xad\x10\x00\x00\xdc\x10\xaa\xd4\xec...,Eastern Easter,9,Spring
5,5,1453845,20009452,MOSFM-11020,NONFED,ST-MOMOS,ST/C&L,USMOMOS,Missouri Department of Conservation,00506,...,8.0,PRIVATE,MO,BARRY,9.0,Barry,b'\x00\x01\xad\x10\x00\x00\x10\xb8u7O\xbbW\xc0...,No Data,No Data,Fall
6,6,832173,953407,STATE_MS_93764,NONFED,ST-MSMSS,ST/C&L,USMSMSS,Mississippi Forestry Commission,MS South Central,...,14.0,MISSING/NOT SPECIFIED,MS,,,,b'\x00\x01\xad\x10\x00\x00\xc0\xb1.n\xa3QV\xc0...,No Data,No Data,Spring
7,7,500074,537645,SFO-MN0349-920122,NONFED,ST-NASF,ST/C&L,USMNMNS,Minnesota Department of Natural Resources,MNMNS,...,14.0,MISSING/NOT SPECIFIED,MN,Mille Lacs,95.0,Mille Lacs,b'\x00\x01\xad\x10\x00\x00Hj\x91\x82adW\xc0\xe...,No Data,No Data,Fall
8,8,5332,5356,FS-1429214,FED,FS-FIRESTAT,FS,USMNSUF,Superior National Forest,0909,...,5.0,USFS,MN,31,31.0,Cook,b'\x00\x01\xad\x10\x00\x00\xcc\xb9\xc2X\xf2\xb...,4th of July,25,Summer
9,9,554703,596457,SFO-NJ0285-99_A051506,NONFED,ST-NASF,ST/C&L,USNJNJS,New Jersey Forest Fire Service,NJNJA,...,14.0,MISSING/NOT SPECIFIED,NJ,Passaic,31.0,Passaic,b'\x00\x01\xad\x10\x00\x00\x140\x81[w\x9fR\xc0...,No Data,No Data,Spring
