In [1]:
%matplotlib notebook
!jupyter nbextension enable --py gmaps

Enabling notebook extension jupyter-gmaps/extension...
      - Validating: [32mOK[0m


In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import gmaps
g_key = "AIzaSyAI0S-X4FUeegPi_brD4_4wklsXXP44S2w"
import scipy.stats as stats

In [3]:
#Read in each csv file 
fire_data_1 = pd.read_csv("Data/us_fires_1.csv", low_memory=False, encoding='latin-1')
fire_data_2 = pd.read_csv("Data/us_fires_2.csv", low_memory=False, encoding='latin-1')
fire_data_3 = pd.read_csv("Data/us_fires_3.csv", low_memory=False, encoding='latin-1')
fire_data_4 = pd.read_csv("Data/us_fires_4.csv", low_memory=False, encoding='latin-1')
fire_data_5 = pd.read_csv("Data/us_fires_5.csv", low_memory=False, encoding='latin-1')
fire_data_6 = pd.read_csv("Data/us_fires_6.csv", low_memory=False, encoding='latin-1')
fire_data_7 = pd.read_csv("Data/us_fires_7.csv", low_memory=False, encoding='latin-1')

#Combine the data frames into one 
all_data = pd.concat([fire_data_1, fire_data_2, fire_data_3, fire_data_4, 
                      fire_data_5, fire_data_6, fire_data_7], axis=0)

#Drop unnecessary columns 
cleaned_df = all_data.drop(['objectid', 'fod_id', 'fpa_id', 'source_system_type', 'source_system', 'nwcg_reporting_agency',
              'nwcg_reporting_unit_id', 'nwcg_reporting_unit_name', 'source_reporting_unit', 'owner_code', 
               'owner_descr', 'fips_code', 'fips_name', 'local_fire_report_id', 'local_incident_id',
              'fire_code', 'ics_209_incident_number', 'ics_209_name', 'mtbs_id', 'complex_name',
              'mtbs_fire_name', 'discovery_doy', 'discovery_time', 'stat_cause_code', 'cont_doy',
              'cont_time', 'fire_name', 'source_reporting_unit_name', 'cont_date', 'discovery_date','county'], axis=1)

#Rename columns 
cleaned_df.columns = ['Year', 'Cause', 'Size', 'Size Class','Latitude', 'Longitude', 'State']

In [4]:
#Rename columns 
cleaned_df.columns = ['Year', 'Cause', 'Size', 'Size Class','Latitude', 'Longitude', 'State']

#Sort data by state 
df = cleaned_df.sort_values('State', ascending=True)
df.head()

Unnamed: 0,Year,Cause,Size,Size Class,Latitude,Longitude,State
162099,1994,Miscellaneous,0.5,B,64.883331,-147.966659,AK
210664,1993,Debris Burning,0.5,B,65.5663,-144.9191,AK
161788,1993,Miscellaneous,0.1,A,61.966667,-149.433334,AK
210663,1993,Lightning,4470.0,F,65.8329,-147.4025,AK
210662,1993,Lightning,5530.0,G,64.1997,-141.0856,AK


In [5]:
df['Size Class'].unique()

array(['B', 'A', 'F', 'G', 'C', 'D', 'E'], dtype=object)

In [6]:
fire_size = df[df["Year"] == 2015]
fire_size.head()

Unnamed: 0,Year,Cause,Size,Size Class,Latitude,Longitude,State
296432,2015,Lightning,42.6,C,65.1092,-150.6206,AK
296433,2015,Lightning,5459.0,G,64.7219,-159.7386,AK
296434,2015,Lightning,5.0,B,62.93,-155.95,AK
296435,2015,Lightning,25687.5,G,64.035,-157.8378,AK
296436,2015,Lightning,60806.6,G,65.13042,-152.96714,AK


In [7]:
california = fire_size[fire_size["State"] == 'CA']
california.head()

Unnamed: 0,Year,Cause,Size,Size Class,Latitude,Longitude,State
80463,2015,Missing/Undefined,2.0,B,37.672235,-120.898356,CA
72077,2015,Arson,0.05,A,34.124843,-117.245297,CA
72076,2015,Miscellaneous,0.01,A,40.476117,-123.943617,CA
72075,2015,Debris Burning,0.01,A,38.723801,-120.7199,CA
72074,2015,Miscellaneous,0.08,A,39.336596,-123.758091,CA


In [8]:
gmaps.configure(api_key=g_key)

locations = fire_size[["Latitude", "Longitude"]]
size = fire_size["Size"].astype(float)

# Plot Heatmap
fig = gmaps.Map()

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights=size, 
                                 dissipating=False, max_intensity=12,
                                 point_radius=0.1)

# Add layer
fig.add_layer(heat_layer)

fig

Map(configuration={'api_key': 'AIzaSyAI0S-X4FUeegPi_brD4_4wklsXXP44S2w'}, data_bounds=[(26.375291362448145, -1…

In [9]:
#Filter out any environmental causes of wildfires
lightning = df[df["Cause"] == "Lightning"]

#Filter out earliest and latest years 
oldest = lightning[lightning["Year"] == 1993]
newest = lightning[lightning["Year"] == 2015]

lightning.head()

Unnamed: 0,Year,Cause,Size,Size Class,Latitude,Longitude,State
210663,1993,Lightning,4470.0,F,65.8329,-147.4025,AK
210662,1993,Lightning,5530.0,G,64.1997,-141.0856,AK
210661,1993,Lightning,7200.0,G,64.7662,-151.2691,AK
161789,1993,Lightning,15.0,C,64.666664,-145.733337,AK
210659,1993,Lightning,5810.0,G,67.1663,-147.8194,AK


In [10]:
#Count the number of wildfires for 1993 and 2015 - for 'observed'
chi_observed_oldest = oldest['Year'].count()
chi_observed_newest = newest['Year'].count()

#Calculate the 'Expected' number of wildfires 
total_fires = lightning['Year'].count()
total_fires/23

12107.304347826086

In [11]:
#Create a data frame for the chi square test 
chi_df = pd.DataFrame(lightning.groupby(by = 'Year')['Cause'].count()).reset_index()
chi_df['Expected'] = chi_df['Cause'].sum() / chi_df.shape[0]
chi_df.columns = ['Year', 'Observed', 'Expected']
chi_df

Unnamed: 0,Year,Observed,Expected
0,1992,12230,11602.833333
1,1993,7539,11602.833333
2,1994,16204,11602.833333
3,1995,8075,11602.833333
4,1996,12634,11602.833333
5,1997,8447,11602.833333
6,1998,10880,11602.833333
7,1999,11797,11602.833333
8,2000,16547,11602.833333
9,2001,13825,11602.833333


In [12]:
critical_value = stats.chi2.ppf(q = 0.95, df = 22)
critical_value

33.92443847144381

In [13]:
stats.chisquare(chi_df['Observed'], chi_df['Expected'])

Power_divergenceResult(statistic=13284.483445135527, pvalue=0.0)