In [1]:
# dependencies
import pandas as pd

# import fire data csv
fireFile = "./fire_data.csv"

# read the file and store in a data frame
fireData = pd.read_csv(fireFile)
fireData.head()

Unnamed: 0,incident_name,incident_is_final,incident_date_last_update,incident_date_created,incident_administrative_unit,incident_administrative_unit_url,incident_county,incident_location,incident_acres_burned,incident_containment,...,incident_latitude,incident_type,incident_id,incident_url,incident_date_extinguished,incident_dateonly_extinguished,incident_dateonly_created,is_active,calfire_incident,notification_desired
0,Bridge Fire,Y,2018-01-09T13:46:00Z,2017-10-31T11:22:00Z,Shasta-Trinity National Forest,,Shasta,"I-5 and Turntable Bay, 7 miles NE of Shasta Lake",37.0,100.0,...,40.774,,2ca11d45-8139-4c16-8af0-880d99b21e82,https://osfm.fire.ca.gov/incidents/2017/10/31/...,2018-01-09T13:46:00Z,1/9/2018,10/31/2017,N,False,False
1,Pala Fire,Y,2020-09-16T14:07:35Z,2009-05-24T14:56:00Z,CAL FIRE San Diego Unit,,San Diego,"Hwy 76 and Pala Temecula, northwest of Pala",122.0,100.0,...,1.0,Wildfire,8f61f461-552d-4538-b186-35ab030da416,https://osfm.fire.ca.gov/incidents/2009/5/24/p...,2009-05-25T00:00:00Z,5/25/2009,5/24/2009,N,True,False
2,River Fire,Y,2013-02-28T20:00:00Z,2013-02-24T08:16:00Z,CAL FIRE San Bernardino Unit,,Inyo,"south of Narrow Gauge Rd & north of Hwy 136, e...",406.0,100.0,...,36.602575,,094719ba-a47b-4abb-9ec5-a506b2b9fd23,https://osfm.fire.ca.gov/incidents/2013/2/24/r...,2013-02-28T20:00:00Z,2/28/2013,2/24/2013,N,True,False
3,Fawnskin Fire,Y,2013-04-22T09:00:00Z,2013-04-20T17:30:00Z,San Bernardino National Forest,,San Bernardino,"west of Delamar Mountain, north of the communi...",30.0,100.0,...,34.288877,,58f89ff8-bd3e-4355-b1c0-8fa05c747d3f,https://osfm.fire.ca.gov/incidents/2013/4/20/f...,2013-04-22T09:00:00Z,4/22/2013,4/20/2013,N,False,False
4,Gold Fire,Y,2013-05-01T07:00:00Z,2013-04-30T12:59:00Z,CAL FIRE Madera-Mariposa-Merced Unit,,Madera,Between Road 210 and Road 200 near Fine Gold C...,274.0,100.0,...,37.116295,,357ffc13-bef9-48eb-810f-c5de851972eb,https://osfm.fire.ca.gov/incidents/2013/4/30/g...,2013-05-01T07:00:00Z,5/1/2013,4/30/2013,N,True,False


In [2]:
# see all columns
fireData.columns

Index(['incident_name', 'incident_is_final', 'incident_date_last_update',
       'incident_date_created', 'incident_administrative_unit',
       'incident_administrative_unit_url', 'incident_county',
       'incident_location', 'incident_acres_burned', 'incident_containment',
       'incident_control', 'incident_cooperating_agencies',
       'incident_longitude', 'incident_latitude', 'incident_type',
       'incident_id', 'incident_url', 'incident_date_extinguished',
       'incident_dateonly_extinguished', 'incident_dateonly_created',
       'is_active', 'calfire_incident', 'notification_desired'],
      dtype='object')

In [3]:
# remove extraneous columns
fireData = fireData[["incident_id","incident_name","incident_county","incident_acres_burned",
                     "incident_dateonly_created","incident_dateonly_extinguished"]]

# rename columns
fireData = fireData.rename(columns={"incident_id":"ID","incident_name":"Name","incident_county":"County",
                                    "incident_acres_burned":"AcresBurned","incident_dateonly_created":"Started",
                                   "incident_dateonly_extinguished":"Extinguished"})

# check for duplicates, then drop ID column
fireData.drop_duplicates(subset=["ID"])
fireData = fireData[["Name","County","AcresBurned","Started","Extinguished"]]

In [4]:
# create a column that contains the duration
# first convert date columns to datetime
fireData["Started"] = pd.to_datetime(fireData["Started"])
fireData["Extinguished"] = pd.to_datetime(fireData["Extinguished"])

# subtract the dates
fireData["Duration"] = fireData["Extinguished"] - fireData["Started"]

# convert duration to string and remove "days"
fireData["Duration"] = fireData["Duration"].astype(str)
fireData["Duration"] = fireData["Duration"].str.replace("days","")

# replace NaT with NaN and convert back to float
fireData["Duration"] = fireData["Duration"].replace(["NaT"],"NaN")
fireData["Duration"] = fireData["Duration"].astype(float)

# add one day to duration to capture fires that started and were extinguished in the same day
fireData["Duration"] = fireData["Duration"] + 1

In [5]:
# create a column for year and filter for fires during or after 2013
fireData["Year"] = fireData["Started"].dt.year
fireData = fireData.loc[(fireData["Year"]>=2013),:]

# create a column to hold the year and month of the start date
fireData["Date"] = fireData["Started"].apply(lambda x: x.strftime('%Y-%m'))


In [6]:
# remove the started and extinguished columns
fireData = fireData[["Date", "County", "Duration", "AcresBurned"]]

# drop nulls
fireData = fireData.dropna()

# reset the index
fireData.reset_index(inplace=True,drop=True)
fireData

Unnamed: 0,Date,County,Duration,AcresBurned
0,2017-10,Shasta,71.0,37.0
1,2013-02,Inyo,5.0,406.0
2,2013-04,San Bernardino,3.0,30.0
3,2013-04,Madera,2.0,274.0
4,2013-05,Tehama,9.0,6965.0
...,...,...,...,...
1795,2021-10,Tehama,2.0,24.0
1796,2021-10,Kings,2.0,447.0
1797,2021-10,Napa,2.0,132.0
1798,2021-10,Trinity,2.0,59.0


In [7]:
# export as csv
fireData.to_csv("./clean/fire_data_clean.csv",index=False)