# Exercise 7-1: Prepare the Forest Fires data

## Read the data

In [1]:
import pandas as pd

In [2]:
fires_by_month = pd.read_pickle('fires_by_month.pkl')

In [3]:
fires_by_month.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count
0,AK,1992,5,4202.0,135.0,14
1,AK,1992,6,86401.0,417.0,23
2,AK,1992,7,48516.7,500.0,26
3,AK,1992,8,3305.0,92.0,4
4,AK,1992,9,20.0,1.0,1


## Add and modify columns

In [4]:
fires_by_month['mean_acres_per_day'] = fires_by_month.acres_burned / fires_by_month.days_burning
fires_by_month.head(3)

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day
0,AK,1992,5,4202.0,135.0,14,31.125926
1,AK,1992,6,86401.0,417.0,23,207.196643
2,AK,1992,7,48516.7,500.0,26,97.0334


In [5]:
fires_by_month['mean_acres_per_day_lambda'] = fires_by_month.apply(
    lambda x: 0 if x.days_burning <= 0 else x.acres_burned / x.days_burning, axis=1)
fires_by_month.head(3)

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,5,4202.0,135.0,14,31.125926,31.125926
1,AK,1992,6,86401.0,417.0,23,207.196643,207.196643
2,AK,1992,7,48516.7,500.0,26,97.0334,97.0334


In [6]:
def convert_month(row):
    month_dict = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr',
                  5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 
                  9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
    return month_dict[row.fire_month]

In [7]:
fires_by_month['fire_month'] = fires_by_month.apply(lambda x: convert_month(x), axis=1)
fires_by_month.head(3)

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,May,4202.0,135.0,14,31.125926,31.125926
1,AK,1992,Jun,86401.0,417.0,23,207.196643,207.196643
2,AK,1992,Jul,48516.7,500.0,26,97.0334,97.0334


## Work with indexes

In [8]:
fires_by_month.set_index(['state','fire_year','fire_month'], inplace=True)
fires_by_month.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
state,fire_year,fire_month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,1992,May,4202.0,135.0,14,31.125926,31.125926
AK,1992,Jun,86401.0,417.0,23,207.196643,207.196643
AK,1992,Jul,48516.7,500.0,26,97.0334,97.0334


In [9]:
fires_by_month_wide = fires_by_month.unstack(level='fire_month')
fires_by_month_wide.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,acres_burned,acres_burned,acres_burned,acres_burned,acres_burned,acres_burned,acres_burned,acres_burned,acres_burned,acres_burned,...,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda,mean_acres_per_day_lambda
Unnamed: 0_level_1,fire_month,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,...,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
state,fire_year,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
AK,1992,,3305.0,,,,48516.7,86401.0,,4202.0,,...,,,,97.0334,207.196643,,31.125926,,,20.0
AK,1993,113.0,1132.0,,,,483493.3,197740.2,,4152.0,,...,,,,243.819112,163.286705,,37.071429,,,
AK,1994,,24139.0,,,,50510.0,184428.0,200.0,1127.7,,...,,,,102.246964,126.580645,0.0,49.030435,,333.333333,12.5


In [10]:
fires_by_month = fires_by_month.reset_index()
fires_by_month.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,May,4202.0,135.0,14,31.125926,31.125926
1,AK,1992,Jun,86401.0,417.0,23,207.196643,207.196643
2,AK,1992,Jul,48516.7,500.0,26,97.0334,97.0334
3,AK,1992,Aug,3305.0,92.0,4,35.923913,35.923913
4,AK,1992,Sep,20.0,1.0,1,20.0,20.0


## Combine data

In [11]:
new_fire = pd.DataFrame(data=[['CA',2021,'June',1000,100,1,10,10]], columns=fires_by_month.columns)
new_fire.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,CA,2021,June,1000,100,1,10,10


In [12]:
fires_by_month = pd.concat([fires_by_month, new_fire])

In [13]:
fires_by_month.tail()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
9296,WY,2015,Sep,5977.0,126.0,25,47.436508,47.436508
9297,WY,2015,Oct,10337.8,15.0,6,689.186667,689.186667
9298,WY,2015,Nov,509.3,3.0,4,169.766667,169.766667
9299,WY,2015,Dec,72.0,0.0,2,inf,0.0
0,CA,2021,June,1000.0,100.0,1,10.0,10.0


In [14]:
fires_by_month.reset_index(drop=True, inplace=True)
fires_by_month.tail()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
9296,WY,2015,Sep,5977.0,126.0,25,47.436508,47.436508
9297,WY,2015,Oct,10337.8,15.0,6,689.186667,689.186667
9298,WY,2015,Nov,509.3,3.0,4,169.766667,169.766667
9299,WY,2015,Dec,72.0,0.0,2,inf,0.0
9300,CA,2021,June,1000.0,100.0,1,10.0,10.0


## Fix the SettingWithCopyWarning

In [15]:
fires_ak = fires_by_month.query('state == "AK"').copy()
fires_ak.mean_acres_per_day = fires_ak.mean_acres_per_day.round()
fires_ak.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,May,4202.0,135.0,14,31.0,31.125926
1,AK,1992,Jun,86401.0,417.0,23,207.0,207.196643
2,AK,1992,Jul,48516.7,500.0,26,97.0,97.0334
3,AK,1992,Aug,3305.0,92.0,4,36.0,35.923913
4,AK,1992,Sep,20.0,1.0,1,20.0,20.0


In [16]:
fires_by_month.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,May,4202.0,135.0,14,31.125926,31.125926
1,AK,1992,Jun,86401.0,417.0,23,207.196643,207.196643
2,AK,1992,Jul,48516.7,500.0,26,97.0334,97.0334
3,AK,1992,Aug,3305.0,92.0,4,35.923913,35.923913
4,AK,1992,Sep,20.0,1.0,1,20.0,20.0
