# Cleaning California wildfire perimeters data

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import json
import jenkspy
import altair_latimes as lat
import altair as alt
alt.renderers.enable('notebook')
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [2]:
# https://frap.fire.ca.gov/
# metadata: https://frap.fire.ca.gov/frap-projects/fire-perimeters/
wildfires = gpd.read_file('/Users/mhustiles/data/data/GIS/wildfires/FRAP/input/wildfires_frap.geojson')

In [3]:
wildfires.dtypes

OBJECTID      float64
YEAR_          object
STATE          object
AGENCY         object
UNIT_ID        object
FIRE_NAME      object
INC_NUM        object
ALARM_DATE     object
CONT_DATE      object
CAUSE         float64
COMMENTS       object
REPORT_AC     float64
GIS_ACRES     float64
C_METHOD      float64
OBJECTIVE     float64
FIRE_NUM       object
Shape_Leng    float64
Shape_Area    float64
geometry       object
dtype: object

In [4]:
wildfires.columns = wildfires.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [5]:
wildfires.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,comments,report_ac,gis_acres,c_method,objective,fire_num,shape_leng,shape_area,geometry
0,1.0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,,,25.736713,8.0,1.0,233414,1902.439051,104152.8,(POLYGON ((-118.4985124819225 34.3824189370447...
1,2.0,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,,,2824.877197,8.0,1.0,233077,20407.965662,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...
2,3.0,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,,54716.0,58410.335938,7.0,1.0,166,169150.71569,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...
3,4.0,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,,,172.214951,8.0,1.0,201384,6117.777086,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...
4,5.0,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,,,4707.99707,8.0,1.0,259483,22907.182174,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...


In [6]:
wildfires[['cause', 'objectid', 'c_method', 'objective', 'report_ac']] = \
wildfires[['cause', 'objectid', 'c_method', 'objective', 'report_ac']].astype(str)
wildfires['cause'] = wildfires['cause'].str.replace('.0','', regex=False)
wildfires['cause'] = wildfires['cause'].str.replace('.0','', regex=False)
wildfires['report_ac'] = wildfires['report_ac'].str.replace('.0','', regex=False)
wildfires['c_method'] = wildfires['c_method'].str.replace('.0','', regex=False)
wildfires['objective'] = wildfires['objective'].str.replace('.0','', regex=False)
wildfires['objectid'] = wildfires['objectid'].str.replace('.0','', regex=False)
wildfires['sqmiles'] = (wildfires['gis_acres'] / 640).round(2) 
wildfires.drop(columns=['shape_area', 'shape_leng'], inplace=True)

In [7]:
wildfires['alarm_date'] = wildfires['alarm_date'].str.replace('T00:00:00', '')

In [8]:
wildfires['cont_date'] = wildfires['cont_date'].str.replace('T00:00:00', '')

In [9]:
wildfires['alarm_date'] = pd.to_datetime(wildfires['alarm_date'], format='%Y/%m/%d')
wildfires['alarm_year'] = wildfires['alarm_date'].dt.year
wildfires['alarm_quarter'] = wildfires['alarm_date'].dt.quarter
wildfires['alarm_day'] = wildfires['alarm_date'].dt.day
wildfires['alarm_month'] = wildfires['alarm_date'].dt.month
wildfires['alarm_monthname'] = wildfires['alarm_date'].dt.month_name()

In [10]:
wildfires['alarm_year'] = wildfires['alarm_year'].astype(str)
wildfires['alarm_quarter'] = wildfires['alarm_quarter'].astype(str)
wildfires['alarm_day'] = wildfires['alarm_day'].astype(str)
wildfires['alarm_month'] = wildfires['alarm_month'].astype(str)

In [11]:
cause_codes = {
'1':'Lightning',
'2':'Equipment Use',
'3':'Smoking',
'4':'Campfire',
'5':'Debris',
'6':'Railroad',
'7':'Arson',
'8':'Playing with Fire',
'9':'Miscellaneous',
'10':'Vehicle',
'11':'Power Line',
'12':'Firefighter Training',
'13':'Non-Firefighter Training',
'14':'Unknown/Unidentified',
'15':'Structure',
'16':'Aircraft',
'17':'Volcanic',
'18':'Escaped Prescribed Burn',
'19':'Illegal Alien Campfire',
}

In [12]:
wildfires['cause_description'] = wildfires['cause'].map(cause_codes)

In [13]:
agencies = {
'BIA':'USDI Bureau of Indian Affairs',
'BLM':'Bureau of Land Management',
'CDF':'California Department of Forestry and Fire Protection',
'CCO':'Contract Counties',
'DOD':'Department of Defense',
'FWS':'USDI Fish and Wildlife Service',
'LRA':'Local Response Area',
'NOP':'No Protection',
'NPS':'National Park Service',
'PVT':'Private',
'USF':'United States Forest Service',
'OTH':'Other',
}

In [14]:
wildfires['agency_description'] = wildfires['agency'].map(agencies)

In [15]:
wildfires.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,fire_num,geometry,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname,cause_description,agency_description
0,1,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21,2007-10-23,14,...,233414,(POLYGON ((-118.4985124819225 34.3824189370447...,0.04,2007,4,21,10,October,Unknown/Unidentified,Contract Counties
1,2,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22,2007-10-25,14,...,233077,(POLYGON ((-118.5844782794717 34.4197766738174...,4.41,2007,4,22,10,October,Unknown/Unidentified,Contract Counties
2,3,2007,CA,USF,ANF,RANCH,166,2007-10-20,2007-11-15,2,...,166,(POLYGON ((-118.7564468802518 34.5965130520924...,91.27,2007,4,20,10,October,Equipment Use,United States Forest Service
3,4,2007,CA,CCO,LAC,EMMA,201384,2007-09-11,2007-09-11,14,...,201384,(POLYGON ((-118.0727716195954 34.5019812566583...,0.27,2007,3,11,9,September,Unknown/Unidentified,Contract Counties
4,5,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24,2007-11-27,14,...,259483,(POLYGON ((-118.7440804532698 34.0812976688129...,7.36,2007,4,24,11,November,Unknown/Unidentified,Contract Counties


---

### Woolsey fire

In [16]:
woolsey = wildfires[wildfires['fire_name'] == 'WOOLSEY']

In [17]:
woolsey.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,fire_num,geometry,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname,cause_description,agency_description
9501,20986,2018,CA,CCO,LAC,WOOLSEY,338981,2018-11-08,2018-11-08,14,...,,(POLYGON ((-118.7921470114273 34.2424586745584...,151.48,2018,4,8,11,November,Unknown/Unidentified,Contract Counties


### Aggregates by years

In [18]:
years = wildfires.groupby(['year_']).agg({'objectid': 'size', 'sqmiles': 'sum' })\
.reset_index().sort_values('year_', ascending=False).rename(columns={'year_':'year', 'objectid': 'count' })

In [19]:
years

Unnamed: 0,year,count,sqmiles
38,2018,411,2484.34
37,2017,607,2225.83
36,2016,347,830.75
35,2015,311,1232.99
34,2014,238,891.96
33,2013,298,890.3
32,2012,350,1324.49
31,2011,317,316.24
30,2010,209,158.66
29,2009,254,681.0


---

### Export cleaned dataframe to GeoJSON

In [20]:
wildfires.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,fire_num,geometry,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname,cause_description,agency_description
0,1,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21,2007-10-23,14,...,233414,(POLYGON ((-118.4985124819225 34.3824189370447...,0.04,2007,4,21,10,October,Unknown/Unidentified,Contract Counties
1,2,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22,2007-10-25,14,...,233077,(POLYGON ((-118.5844782794717 34.4197766738174...,4.41,2007,4,22,10,October,Unknown/Unidentified,Contract Counties
2,3,2007,CA,USF,ANF,RANCH,166,2007-10-20,2007-11-15,2,...,166,(POLYGON ((-118.7564468802518 34.5965130520924...,91.27,2007,4,20,10,October,Equipment Use,United States Forest Service
3,4,2007,CA,CCO,LAC,EMMA,201384,2007-09-11,2007-09-11,14,...,201384,(POLYGON ((-118.0727716195954 34.5019812566583...,0.27,2007,3,11,9,September,Unknown/Unidentified,Contract Counties
4,5,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24,2007-11-27,14,...,259483,(POLYGON ((-118.7440804532698 34.0812976688129...,7.36,2007,4,24,11,November,Unknown/Unidentified,Contract Counties


In [21]:
wildfires.to_file('/Users/mhustiles/data/data/GIS/wildfires/FRAP/output/wildfires.geojson', driver='GeoJSON')