In [1]:
import pandas as pd

In [18]:
df = pd.read_csv('data/raw_data/WFIGS_-_Wildland_Fire_Locations_Full_History.csv', parse_dates = ['FireDiscoveryDateTime'])

In [19]:
#Extract month
df['FireDiscoveryMonth'] = df['FireDiscoveryDateTime'].dt.month

In [20]:
#Explore relevent columns
df[['FireDiscoveryDateTime', 'FireDiscoveryMonth', "GACC"]]

Unnamed: 0,FireDiscoveryDateTime,FireDiscoveryMonth,GACC
0,2020-08-06 18:58:00+00:00,8,NRCC
1,2020-02-28 20:45:40+00:00,2,OSCC
2,2017-10-17 20:20:24+00:00,10,NRCC
3,2019-07-01 19:54:00+00:00,7,OSCC
4,2016-06-20 22:05:59+00:00,6,ONCC
...,...,...,...
154587,2020-03-26 21:59:59+00:00,3,ONCC
154588,2020-11-13 20:43:26+00:00,11,RMCC
154589,2020-07-02 21:07:46+00:00,7,SWCC
154590,2020-07-04 04:44:59+00:00,7,NRCC


In [28]:
#Load gacc names
gacc_names = pd.read_csv('data/raw_data/gacc_names.csv')
gacc_names

Unnamed: 0,GACCName,GACC
0,Alaska,AICC
1,Eastern Area,EACC
2,Great Basin,GBCC
3,Northern California,ONCC
4,Northern Rockies,NRCC
5,Northwest,NWCC
6,Rocky Mountain,RMCC
7,Southern Area,SACC
8,Southern California,OSCC
9,Southwest,SWCC


In [37]:
#Group by month and gacc region and merge in names
grouped_df = pd.DataFrame(df.groupby(['FireDiscoveryMonth', 'GACC'])['OBJECTID'].count()).reset_index()
grouped_df.columns = ['month', 'GACC', 'number_fires']

grouped_df = grouped_df.merge(gacc_names, on = 'GACC', how = 'left')
grouped_df = grouped_df[~grouped_df['GACCName'].isna()]

In [40]:
#Extract for category list in index.html
grouped_df['GACCName'].unique()

array(['Eastern Area', 'Great Basin', 'Northern Rockies', 'Northwest',
       'Northern California', 'Southern California', 'Rocky Mountain',
       'Southern Area', 'Southwest', 'Alaska'], dtype=object)

In [42]:
#Extract for domain in index.html
df.groupby('FireDiscoveryMonth')['OBJECTID'].count().max()

31980

In [55]:
#Reshape the dataset so it works for steamgraph
reshaped_gacc_df = pd.DataFrame()
for month in grouped_df.month.unique():
    month_df = grouped_df[grouped_df['month'] == month]
    row_append_dict = {}
    for index, row in month_df.iterrows():
        gacc = row['GACCName']
        row_append_dict['month'] = row['month']
        row_append_dict['{}'.format(gacc)] = row['number_fires']
    reshaped_gacc_df = reshaped_gacc_df.append(pd.Series(row_append_dict), ignore_index = True)

In [60]:
#Reorder columnns so month is first
reshaped_gacc_df = reshaped_gacc_df[['month','Eastern Area', 'Great Basin', 'Northern California',
       'Northern Rockies', 'Northwest', 'Rocky Mountain', 'Southern Area',
       'Southern California', 'Southwest', 'Alaska']]

In [62]:
#Export to csv
reshaped_gacc_df.to_csv("data/processed_data/fires_by_gacc_and_month.csv", index = False)