In [1]:
# Import packages and connect to database
import sqlite3
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

conn = sqlite3.connect("Wildfires.sqlite")
print(conn.total_changes)

0


In [2]:
# Read in Data
df = pd.read_sql_query("SELECT * FROM Fires;", conn)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1880465 entries, 0 to 1880464
Data columns (total 39 columns):
 #   Column                      Dtype  
---  ------                      -----  
 0   OBJECTID                    int64  
 1   FOD_ID                      int64  
 2   FPA_ID                      object 
 3   SOURCE_SYSTEM_TYPE          object 
 4   SOURCE_SYSTEM               object 
 5   NWCG_REPORTING_AGENCY       object 
 6   NWCG_REPORTING_UNIT_ID      object 
 7   NWCG_REPORTING_UNIT_NAME    object 
 8   SOURCE_REPORTING_UNIT       object 
 9   SOURCE_REPORTING_UNIT_NAME  object 
 10  LOCAL_FIRE_REPORT_ID        object 
 11  LOCAL_INCIDENT_ID           object 
 12  FIRE_CODE                   object 
 13  FIRE_NAME                   object 
 14  ICS_209_INCIDENT_NUMBER     object 
 15  ICS_209_NAME                object 
 16  MTBS_ID                     object 
 17  MTBS_FIRE_NAME              object 
 18  COMPLEX_NAME                object 
 19  FIRE_YEAR            

In [3]:
# Only look at fires from 2000 or later
fires_all = df[df['FIRE_YEAR'] >= 2000]
fires_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1308317 entries, 0 to 1880464
Data columns (total 39 columns):
 #   Column                      Non-Null Count    Dtype  
---  ------                      --------------    -----  
 0   OBJECTID                    1308317 non-null  int64  
 1   FOD_ID                      1308317 non-null  int64  
 2   FPA_ID                      1308317 non-null  object 
 3   SOURCE_SYSTEM_TYPE          1308317 non-null  object 
 4   SOURCE_SYSTEM               1308317 non-null  object 
 5   NWCG_REPORTING_AGENCY       1308317 non-null  object 
 6   NWCG_REPORTING_UNIT_ID      1308317 non-null  object 
 7   NWCG_REPORTING_UNIT_NAME    1308317 non-null  object 
 8   SOURCE_REPORTING_UNIT       1308317 non-null  object 
 9   SOURCE_REPORTING_UNIT_NAME  1308317 non-null  object 
 10  LOCAL_FIRE_REPORT_ID        284168 non-null   object 
 11  LOCAL_INCIDENT_ID           762425 non-null   object 
 12  FIRE_CODE                   276762 non-null   object 
 1

In [4]:
# Drop columns with a lot of missing data
fires = fires_all.drop(columns = ['FOD_ID', 'FPA_ID', 'LOCAL_FIRE_REPORT_ID', 
                                'LOCAL_INCIDENT_ID', 'FIRE_CODE', 
                                'ICS_209_INCIDENT_NUMBER','ICS_209_NAME', 
                                'MTBS_ID', 'MTBS_FIRE_NAME', 'COMPLEX_NAME',
                                 'COUNTY', 'FIPS_CODE', 'FIPS_NAME'])
fires.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1308317 entries, 0 to 1880464
Data columns (total 26 columns):
 #   Column                      Non-Null Count    Dtype  
---  ------                      --------------    -----  
 0   OBJECTID                    1308317 non-null  int64  
 1   SOURCE_SYSTEM_TYPE          1308317 non-null  object 
 2   SOURCE_SYSTEM               1308317 non-null  object 
 3   NWCG_REPORTING_AGENCY       1308317 non-null  object 
 4   NWCG_REPORTING_UNIT_ID      1308317 non-null  object 
 5   NWCG_REPORTING_UNIT_NAME    1308317 non-null  object 
 6   SOURCE_REPORTING_UNIT       1308317 non-null  object 
 7   SOURCE_REPORTING_UNIT_NAME  1308317 non-null  object 
 8   FIRE_NAME                   704705 non-null   object 
 9   FIRE_YEAR                   1308317 non-null  int64  
 10  DISCOVERY_DATE              1308317 non-null  float64
 11  DISCOVERY_DOY               1308317 non-null  int64  
 12  DISCOVERY_TIME              703010 non-null   object 
 1

In [5]:
# Creating new date variables
fires['DAYS_TO_CONTAIN'] = fires['CONT_DATE'] - fires['DISCOVERY_DATE'] 
fires['DAYS_TO_CONTAIN'] = fires['DAYS_TO_CONTAIN'].fillna(0)
fires['DISCOVERY_DATE_DT'] = pd.to_datetime(fires['DISCOVERY_DATE'], unit='D', origin='julian')
fires['CONT_DATE_DT'] = pd.to_datetime(fires['CONT_DATE'], unit='D', origin='julian')

In [6]:
# NOAA climate regions
fires['REGION'] = np.where(fires['STATE'].isin(['WA', 'OR', 'ID']), 'Northwest', 
                  np.where(fires['STATE'].isin(['CA', 'NV']), 'West', 
                  np.where(fires['STATE'].isin(['MT', 'NE', 'ND', 'SD', 'WY']), 'Northern Rockies', 
                  np.where(fires['STATE'].isin(['AK']), 'Alaska', 
                  np.where(fires['STATE'].isin(['AZ', 'NM', 'CO', 'UT']), 'Southwest',          
                  np.where(fires['STATE'].isin(['OK', 'TX', 'KS', 'AR', 'LA', 'MS']), 'South', 
                  np.where(fires['STATE'].isin(['AL', 'FL', 'GA', 'NC', 'SC', 'VA']), 'Southeast', 
                  np.where(fires['STATE'].isin(['IL', 'IN', 'KY', 'MO', 'TN', 'OH', 'WV']), 'Ohio Valley', 
                  np.where(fires['STATE'].isin(['IA',  'MI', 'MN', 'WI']), 'Upper Midwest', 
                  np.where(fires['STATE'].isin(['CT', 'NY', 'ME', 'MA', 'MD', 'NH', 'NJ', 'PA', 'RI', 'VT', 'DE']), 'Northeast', 'Other'        
                                  ))))))))))

In [7]:
fires.to_csv('Fires_During_After_2000.csv')

In [8]:
# Restrict to fires at least 10 acres large
fires_10_acre = fires[fires['FIRE_SIZE'] >= 10]
fires_10_acre.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 189837 entries, 16 to 1880441
Data columns (total 30 columns):
 #   Column                      Non-Null Count   Dtype         
---  ------                      --------------   -----         
 0   OBJECTID                    189837 non-null  int64         
 1   SOURCE_SYSTEM_TYPE          189837 non-null  object        
 2   SOURCE_SYSTEM               189837 non-null  object        
 3   NWCG_REPORTING_AGENCY       189837 non-null  object        
 4   NWCG_REPORTING_UNIT_ID      189837 non-null  object        
 5   NWCG_REPORTING_UNIT_NAME    189837 non-null  object        
 6   SOURCE_REPORTING_UNIT       189837 non-null  object        
 7   SOURCE_REPORTING_UNIT_NAME  189837 non-null  object        
 8   FIRE_NAME                   100447 non-null  object        
 9   FIRE_YEAR                   189837 non-null  int64         
 10  DISCOVERY_DATE              189837 non-null  float64       
 11  DISCOVERY_DOY               189837 no

In [9]:
fires_10_acre.to_csv('Fires_At_Least_10_Acres_2000_Later.csv')

In [10]:
# Restrict to fires at least 1 acre large and took more than 1 day to contain
fires_1_acre = fires[fires['FIRE_SIZE'] >= 1]
fires_1_acre_1_day = fires_1_acre[fires_1_acre['DAYS_TO_CONTAIN']>0]
fires_1_acre_1_day.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61965 entries, 16 to 1880458
Data columns (total 30 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   OBJECTID                    61965 non-null  int64         
 1   SOURCE_SYSTEM_TYPE          61965 non-null  object        
 2   SOURCE_SYSTEM               61965 non-null  object        
 3   NWCG_REPORTING_AGENCY       61965 non-null  object        
 4   NWCG_REPORTING_UNIT_ID      61965 non-null  object        
 5   NWCG_REPORTING_UNIT_NAME    61965 non-null  object        
 6   SOURCE_REPORTING_UNIT       61965 non-null  object        
 7   SOURCE_REPORTING_UNIT_NAME  61965 non-null  object        
 8   FIRE_NAME                   58174 non-null  object        
 9   FIRE_YEAR                   61965 non-null  int64         
 10  DISCOVERY_DATE              61965 non-null  float64       
 11  DISCOVERY_DOY               61965 non-null  int64  

In [11]:
fires_1_acre_1_day.to_csv('Fires_At_Least_1_Acre_1_Day_2000_Later.csv')

In [43]:
fires['NO_CONT'] = np.where(fires['CONT_DATE'].isna(), 0, 1)
fires['NO_CONT'].value_counts()

1    678456
0    629861
Name: NO_CONT, dtype: int64

In [44]:
data_crosstab = pd.crosstab(fires['SOURCE_SYSTEM_TYPE'], 
                            fires['NO_CONT'],  
                               margins = False, normalize = 'index') 
print(data_crosstab) 

NO_CONT                    0         1
SOURCE_SYSTEM_TYPE                    
FED                 0.016662  0.983338
INTERAGCY           0.903783  0.096217
NONFED              0.621083  0.378917


In [45]:
data_crosstab = pd.crosstab(fires['SOURCE_SYSTEM_TYPE'], 
                            fires['NO_CONT'],  
                               margins = False) 
print(data_crosstab) 

NO_CONT                  0       1
SOURCE_SYSTEM_TYPE                
FED                   5315  313672
INTERAGCY            32256    3434
NONFED              592290  361350


In [55]:
data_crosstab = pd.crosstab(fires['NWCG_REPORTING_AGENCY'], 
                            fires['NO_CONT'],  
                               margins = False, normalize = 'index') 
print(data_crosstab) 

NO_CONT                       0         1
NWCG_REPORTING_AGENCY                    
BIA                    0.004683  0.995317
BLM                    0.001948  0.998052
BOR                    0.000000  1.000000
DOD                    0.475000  0.525000
DOE                    0.500000  0.500000
FS                     0.017267  0.982733
FWS                    0.094374  0.905626
IA                     0.998855  0.001145
NPS                    0.094919  0.905081
ST/C&L                 0.623050  0.376950
TRIBE                  0.008221  0.991779


In [61]:
data_crosstab = pd.crosstab(fires['NWCG_REPORTING_AGENCY'], 
                            fires['NO_CONT'],  
                               margins = False) 
print(data_crosstab) 

NO_CONT                     0       1
NWCG_REPORTING_AGENCY                
BIA                       388   82473
BLM                       134   68665
BOR                         0      14
DOD                        38      42
DOE                         1       1
FS                       2378  135338
FWS                      1384   13281
IA                      21816      25
NPS                      1160   11061
ST/C&L                 602537  364540
TRIBE                      25    3016


In [57]:
data_crosstab = pd.crosstab(fires['OWNER_DESCR'], 
                            fires['NO_CONT'],  
                               margins = False, normalize = 'index') 
print(data_crosstab) 

NO_CONT                       0         1
OWNER_DESCR                              
BIA                    0.005221  0.994779
BLM                    0.000895  0.999105
BOR                    0.003876  0.996124
COUNTY                 0.196866  0.803134
FOREIGN                0.166667  0.833333
FWS                    0.139621  0.860379
MISSING/NOT SPECIFIED  0.748108  0.251892
MUNICIPAL/LOCAL        0.200240  0.799760
NPS                    0.106216  0.893784
OTHER FEDERAL          0.014650  0.985350
PRIVATE                0.194422  0.805578
STATE                  0.151607  0.848393
STATE OR PRIVATE       0.644809  0.355191
TRIBAL                 0.003503  0.996497
UNDEFINED FEDERAL      0.324841  0.675159
USFS                   0.018531  0.981469


In [62]:
data_crosstab = pd.crosstab(fires['OWNER_DESCR'], 
                            fires['NO_CONT'],  
                               margins = False) 
print(data_crosstab) 

NO_CONT                     0       1
OWNER_DESCR                          
BIA                       370   70496
BLM                        38   42399
BOR                         1     257
COUNTY                    289    1179
FOREIGN                     2      10
FWS                      1171    7216
MISSING/NOT SPECIFIED  532839  179410
MUNICIPAL/LOCAL           836    3339
NPS                      1097    9231
OTHER FEDERAL              66    4439
PRIVATE                 46028  190715
STATE                    3973   22233
STATE OR PRIVATE        40229   22160
TRIBAL                     30    8535
UNDEFINED FEDERAL         714    1484
USFS                     2178  115353


In [58]:
fires_1_acre_1_day['SOURCE_SYSTEM_TYPE'].value_counts()

FED          45344
NONFED       15318
INTERAGCY     1303
Name: SOURCE_SYSTEM_TYPE, dtype: int64

In [59]:
fires_1_acre_1_day['NWCG_REPORTING_AGENCY'].value_counts()

FS        19834
ST/C&L    16476
BLM       13933
BIA        6803
FWS        2835
NPS        1780
TRIBE       250
DOD          31
IA           19
BOR           3
DOE           1
Name: NWCG_REPORTING_AGENCY, dtype: int64

In [60]:
fires_1_acre_1_day['OWNER_DESCR'].value_counts()

USFS                     17411
PRIVATE                  12869
BLM                       8856
MISSING/NOT SPECIFIED     6148
BIA                       5567
STATE                     2837
STATE OR PRIVATE          2518
FWS                       2186
NPS                       1601
TRIBAL                     822
OTHER FEDERAL              480
MUNICIPAL/LOCAL            359
UNDEFINED FEDERAL          130
COUNTY                     119
BOR                         60
FOREIGN                      2
Name: OWNER_DESCR, dtype: int64