In [None]:
import numpy as np
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import os
import descartes #if this fails run conda install -c conda-forge descartes from cmd prompt
import geopandas as gpd #if this fails run conda install geopandas from cmd prompt
from shapely.geometry import Point, Polygon
%matplotlib inline

The goal of this notebook is to import US wildfire data from https://www.kaggle.com/rtatman/188-million-us-wildfires and analyze if the wildfires of recent years are normal or abnormal.

Plot the number of wildfires and the total acreage of wildfires over time
Plot for California alone
Plot for entire US

Further Analysis
Plot wildfires on US map

In [None]:
# Import database and load into appropriate dataframes
wf_data = sqlite3.connect("FPA_FOD_20170508.sqlite")
df = pd.read_sql_query("select OBJECTID,FOD_ID,FIRE_NAME,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,DISCOVERY_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE,COUNTY,FIPS_NAME from Fires", 
                       wf_data) #remove limit for final evaluation# create separate dataframe for only california
ca_df=df[df['STATE']=='CA']

In [None]:
rain_data_df = pd.read_csv('CA_avg_rainfall_data.csv',index_col=0)
yearly_rain_df = rain_data_df.groupby('YEAR')

In [None]:
year_rain_totals_df = yearly_rain_df.sum()['VALUE'].to_frame()

<<< JOSH SECTION >>>

Two DataFrames created

    df: data from all US fires
    ca_df: data from all California fires
    
Next Step: Get Fire size and count of fires into a new data frame grouped by year
    

In [None]:
year_df = df.groupby('FIRE_YEAR')['FIRE_SIZE'].agg(['sum','count'])
year_ca_df = ca_df.groupby('FIRE_YEAR')['FIRE_SIZE'].agg(['sum','count'])


In [None]:
year_df.rename(columns={"sum": "FIRE_ACRES", "count": "FIRE_COUNT"}, inplace=True)
year_ca_df.rename(columns={"sum": "FIRE_ACRES", "count": "FIRE_COUNT"}, inplace=True)

In [None]:
fig = plt.figure(figsize=(10,6)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.4

year_df.FIRE_ACRES.plot(kind='bar', color='red', ax=ax,position=0, alpha=0.5, width = 0.4)

year_df.FIRE_COUNT.plot(kind='bar', color='orange', ax=ax2, position=1, alpha=0.5, width = 0.4)

ax.set_ylabel('Acres')
ax.legend(loc=0)
ax2.set_ylabel('Count')
ax2.legend(loc=(0.005,0.85))

plt.show()

In [None]:
fig_ca = plt.figure(figsize=(10,6)) # Create matplotlib figure

ax = fig_ca.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.4

year_ca_df.FIRE_ACRES.plot(kind='bar', color='red', ax=ax, position=0, alpha=0.5, width = 0.5)
#year_ca_df.FIRE_COUNT.plot(kind='bar', color='orange', ax=ax2, alpha=0.5, position=1, width = 0.4)
year_rain_totals_df.VALUE.plot(kind='bar', color='blue', ax=ax2, position=1, alpha=0.1, width = 0.5)

plt.plot()

ax.set_ylabel('Acres')
ax.legend(loc=(0.005,0.9))
ax2.set_ylabel('Rain')
ax2.legend(loc=(0.005,0.85))

plt.show()

Initial Conclusion

The count of fires appears to be flat over time, however the acreage burned is increasing

In California, the same conclusion about count of fires appears true however the acreage burns appears to be cyclical and increasing significantly.

Interesting next steps would be to correlate the spikes in acreage with specific events (weather, etc.)

In [None]:
day_ca_df = ca_df.groupby('DISCOVERY_DOY')['FIRE_SIZE'].agg(['sum','count'])
day_ca_df.rename(columns={"sum": "FIRE_ACRES", "count": "FIRE_COUNT"}, inplace=True)

In [None]:
# Create Figure (empty canvas)
fig = plt.figure(figsize=(10,10))


# Add set of axes to figure
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)
day_ca_df.FIRE_COUNT.plot(kind='bar', color='red', ax=ax, position=0, alpha=0.5, width = 0.4)
plt.show()

Next section will be to geographically map the points on a map

In [None]:
us_map = gpd.read_file('states.shp')

In [None]:
# create separate dataframe for only large fires
large_df=df[df['FIRE_SIZE'] > 1000.0]
geometry = [Point(xy) for xy in zip(large_df['LONGITUDE'], large_df['LATITUDE'])]
gdf = gpd.GeoDataFrame(large_df, geometry=geometry)

large_ca_df=ca_df[ca_df['FIRE_SIZE'] > 1000.0]
geometry = [Point(xy) for xy in zip(large_ca_df['LONGITUDE'], large_ca_df['LATITUDE'])]
ca_gdf = gpd.GeoDataFrame(large_ca_df, geometry=geometry)

years = np.arange(1992, 2016)

In [None]:
for y in years:
    y_df = large_ca_df[large_ca_df['FIRE_YEAR']==y]
    geometry = [Point(xy) for xy in zip(y_df['LONGITUDE'], y_df['LATITUDE'])]
    ca_gdf = gpd.GeoDataFrame(y_df, geometry=geometry)
    ax = us_map[us_map.STATE_ABBR == "CA"].plot(figsize=(15,15), alpha=0.5)
    ax.set_xlim(-126.0, -113.0)
    ax.set_ylim(32.0, 43.0)
    fig = ca_gdf.plot(ax=ax, marker='o', color='red', markersize=y_df['FIRE_SIZE']/10, alpha=0.75);
    fig.axis('off')
    fig.set_title('{} Fires > 1000 acres'.format(y))
    filepath = os.path.join('./', 'ca_{}_fires.jpg'.format(y))
    chart = fig.get_figure()
    chart.savefig(filepath, dpi=300)

In [None]:
geometry = [Point(xy) for xy in zip(large_ca_df['LONGITUDE'], large_ca_df['LATITUDE'])]
ca_gdf = gpd.GeoDataFrame(large_ca_df, geometry=geometry)
ax = us_map[us_map.STATE_ABBR == "CA"].plot(figsize=(15,15), alpha=0.5)
ax.set_xlim(-126.0, -113.0)
ax.set_ylim(32.0, 43.0)
fig = ca_gdf.plot(ax=ax, marker='o', color='red', markersize=y_df['FIRE_SIZE']/100, alpha=0.75);
fig.axis('off')
fig.set_title('CA Fires > 1000 acres 1992-2015'.format(y))
filepath = os.path.join('./', 'ca_1992-2015_fires.jpg'.format(y))
chart = fig.get_figure()
chart.savefig(filepath, dpi=300)

<<< BHARAT SECTION >>> COUNTY PROCESSING

In [None]:
# getting counties of CA which are least & most fire-prone based on 24 yrs fire count with large fires >1000
county_ca_df = ca_df.groupby('FIPS_NAME')['FOD_ID'].agg(['count'])
county_ca_df.rename(columns={"count": "FIRE_COUNT"}, inplace=True)
county_ca_df = county_ca_df.sort_values(by=['FIRE_COUNT'])
county_ca_df

In [None]:
# Create Figure (empty canvas)
fig = plt.figure(figsize=(15,15))


# Add set of axes to figure
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)
county_ca_df.FIRE_COUNT.plot(kind='bar', color='red', ax=ax, position=0, alpha=0.5, width = 0.4)

ax.set_ylabel('Fire Count')
ax.legend(loc=(0.005,0.9))


plt.show()

In [None]:
ca_counties = gpd.read_file('CA_counties.shp')
ca_counties.plot()

In [None]:
county_ca_df.reset_index(inplace = True)

In [None]:
# select only the coluns that we want for the map
county_ca_df = county_ca_df[['FIPS_NAME','FIRE_COUNT']]

# those are really terrible column names. let's rename them to something simpler
score = county_ca_df.rename(index=str, columns={"FIPS_NAME": "County Name","FIRE_COUNT":"Counts"})

# check dat dataframe
score.head()

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged = ca_counties.set_index('NAME').join(score.set_index('County Name'))
merged.reset_index(inplace = True)
merged.head()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'Counts'

# set the range for the choropleth
vmin, vmax = county_ca_df.FIRE_COUNT.min(),county_ca_df.FIRE_COUNT.max()

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(15, 10))

# create map
merged.plot(column=variable, cmap='Blues', linewidth=0.6, ax=ax, edgecolor='0.6')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('CA County Fires - Least fire-prone ~ Most fire-prone', \
              fontdict={'fontsize': '25',
                        'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('CA County Fires, 1992-2015',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)

# this will save the figure as a high-res png. you can also save as svg
fig.savefig('CAcountyfirepronemap.png', dpi=300)
