In this notebook we will make different interactives maps to have a global overview of wildfires in US. Please note that due to the size of some maps, they could not appear without running the cells.

First, we apply the corrections/modifications. See the corresponding notebook.


In [1]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")


# Connect to the database and import the table 'Fires'
import os
import sqlite3

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


conn = sqlite3.connect('/kaggle/input/us-wildfire-records-6th-edition/data.sqlite') 
df = pd.read_sql_query('SELECT * FROM Fires;', con=conn)

# Close connection
conn.close()

# Select only columns of interest
col_of_interest = ['OBJECTID', 'FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY', 'NWCG_GENERAL_CAUSE', 'CONT_DATE', 'CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE' , 'LONGITUDE' , 'STATE']
df = df[col_of_interest].set_index('OBJECTID', verify_integrity = True)

# Rename the columns and the index
df = df.rename(columns = {'FIRE_YEAR':'fire_year',
                         'DISCOVERY_DATE':'disc_date',
                         'DISCOVERY_DOY':'disc_doy',
                         'NWCG_GENERAL_CAUSE':'cause',
                         'CONT_DATE':'cont_date',
                         'FIRE_SIZE' : 'fire_size',
                         'FIRE_SIZE_CLASS' : 'fire_class',
                         'LATITUDE':'latitude',
                         'LONGITUDE':'longitude',
                         'STATE':'state'}).rename_axis('id')

# Convert the columns 'disc_date' and 'cont_date' in datetime format and add a colmun with the month
df['disc_date'] = pd.to_datetime(pd.to_datetime(df['disc_date'], format = '%m/%d/%Y').dt.strftime('%Y-%m-%d'))
df['cont_date'] = pd.to_datetime(pd.to_datetime(df['cont_date'], format = '%m/%d/%Y').dt.strftime('%Y-%m-%d'))
df.insert(3, 'disc_month', df['disc_date'].dt.month)
df.insert(4, 'disc_day', df['disc_date'].dt.day_name())

# Define a new column 'origin'
map_cause = {'Power generation/transmission/distribution':'Accidental',
            'Natural':'Natural',
            'Debris and open burning':'Accidental',
            'Missing data/not specified/undetermined':'Undefined',
            'Recreation and ceremony':'Accidental',
            'Equipment and vehicle use':'Accidental',
            'Arson/incendiarism':'Criminal',
            'Fireworks':'Accidental',
            'Other causes':'Accidental',
            'Railroad operations and maintenance':'Accidental',
            'Smoking':'Accidental',
            'Misuse of fire by a minor':'Accidental',
            'Firearms and explosives use':'Accidental'}

df['origin'] = df['cause'].map(map_cause)

df.head()

/kaggle/input/us-wildfire-records-6th-edition/_variable_descriptions.csv
/kaggle/input/us-wildfire-records-6th-edition/data.sqlite
/kaggle/input/us-wildfire-records-6th-edition/data.csv


Unnamed: 0_level_0,fire_year,disc_date,disc_doy,disc_month,disc_day,cause,cont_date,CONT_TIME,fire_size,fire_class,latitude,longitude,state,origin
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,2005,2005-02-02,33,2,Wednesday,Power generation/transmission/distribution,2005-02-02,1730,0.1,A,40.036944,-121.005833,CA,Accidental
2,2004,2004-05-12,133,5,Wednesday,Natural,2004-05-12,1530,0.25,A,38.933056,-120.404444,CA,Natural
3,2004,2004-05-31,152,5,Monday,Debris and open burning,2004-05-31,2024,0.1,A,38.984167,-120.735556,CA,Accidental
4,2004,2004-06-28,180,6,Monday,Natural,2004-07-03,1400,0.1,A,38.559167,-119.913333,CA,Natural
5,2004,2004-06-28,180,6,Monday,Natural,2004-07-03,1200,0.1,A,38.559167,-119.933056,CA,Natural


Let's start by a map at the US scale, showing different origin for wildfires bigger than 1000 acres.

In [2]:
import plotly.express as px
import plotly.subplots as psp
import plotly.graph_objects as go


us_map = px.scatter_geo(df[(df['fire_class'] == 'G') | (df['fire_class'] == 'F')].sort_values('fire_year'),
                                 lon = 'longitude',
                                 lat = 'latitude',
                                 size = 'fire_size',
                                 color = 'origin',
                                 hover_data = 'fire_class',
                                 animation_frame = 'fire_year',
                                 labels = {'origin' : '',
                                          'fire_year' : 'Year'},
                                 title = 'Origin of large wildfires in US from 1992 to 2020')
                                 
us_map.update_layout(width=1200,
                              height=600,
                              geo_scope='usa',
                              template = "plotly_dark",
                              legend = dict(orientation="h",
                                            yanchor="top",
                                            y=1.1,
                                            xanchor="left",
                                            x=0.1)
                             )

us_map.show()

In [3]:
# Detailed interactive map for California
interactive_map_CA = px.scatter_geo(df[(df['state'] == 'CA') & ((df['fire_class'] == 'G') | (df['fire_class'] == 'F'))].sort_values('fire_year'),
                                 lon = 'longitude',
                                 lat = 'latitude',
                                 size = 'fire_size',
                                 color = 'cause',
                                 hover_data = 'fire_class',
                                 animation_frame = 'fire_year',
                                 labels = {'cause' : '',
                                          'fire_year' : 'Year'},
                                    title = 'Causes of large wildfires in California over time'
                                 )
                                 
interactive_map_CA.update_layout(width=800,
                              height=800,
                              template = "plotly_dark",
                              geo_scope = 'usa',
                              legend = dict(orientation="h",
                                            yanchor="top",
                                            y=1.1,
                                            xanchor="left",
                                            x=0.1),
                                 geo=dict(center=dict(lat=37.4, # 36.778261
                                                      lon=-119.4179324),
                                          projection_scale=2.5
                                         )
                                )

interactive_map_CA.show()

In [4]:
import numpy as np

fires_by_states = df.groupby('state')['state'].value_counts()
# fires_by_states = df.groupby('state')['fire_size'].sum()

fires_by_states.head()
fig = px.choropleth(locations=fires_by_states.index,
                    locationmode="USA-states", 
                    color=fires_by_states.values, 
                    scope="usa",
                    color_continuous_scale="solar",
                    template = "plotly_dark",
                    labels = {'locations' : 'State',
                              'color' : 'Nbr Wildfires'},
                    title = 'Number of wildfires by state since 1992 ')

fig.update_layout(width=800,
                  height=400,
                  coloraxis_colorbar=dict(title="Total"),
                  legend = dict(orientation="h",
                                yanchor="top",
                                y=1.1,
                                xanchor="left",
                                x=0.1)
                             )


fig.show()

States in the south are more prone to wildfires.

In [5]:
fires_by_states = df.groupby('state')['fire_size'].sum()

fires_by_states.head()
fig = px.choropleth(locations=fires_by_states.index,
                    locationmode="USA-states", 
                    color=fires_by_states.values, 
                    scope="usa",
                    color_continuous_scale="solar",
                    template = "plotly_dark",
                    labels = {'locations' : 'State',
                              'color' : 'Burned area'},
                    title = 'Total area burned by state since 1992')

fig.update_layout(width=800,
                  height=400,
                  coloraxis_colorbar=dict(title="Area"),
                  legend = dict(orientation="h",
                                yanchor="top",
                                y=1.1,
                                xanchor="left",
                                x=0.1)
                             )


fig.show()

However, total burned area are more important in the western part of the US.