The main goal of this notebook is to show some visualization with seaborn regarding the number of fires in Brazil.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [None]:
data = pd.read_csv('/kaggle/input/forest-fires-in-brazil/amazon.csv', encoding='latin1', thousands = '.')
data.head()

## Analysis by year and month

In [None]:
print(color.BOLD + "Number of lines : " + color.END + str(data.count()['year']))
print(color.BOLD + 'States in the file : ' + color.END)
states = pd.unique(data['state'])
print(states)
print(color.BOLD + "Years from : " + color.END + str(data['year'].min())+" to "+str(data['year'].max()))

In [None]:
nb_fires_per_year = data.groupby(['year']).sum().reset_index()

In [None]:
sns.set()
plt.figure(figsize=(15,3))
locator = matplotlib.ticker.MultipleLocator()
plt.gca().xaxis.set_major_locator(locator)
formatter = matplotlib.ticker.StrMethodFormatter("{x:.0f}")
plt.gca().xaxis.set_major_formatter(formatter)
ax = sns.lineplot(x="year", y="number", data=nb_fires_per_year, color='Red')
ax.set_ylabel('')    
ax.set_xlabel('')
ax.set_title("Number of fires by year in Brazil",fontdict={'fontsize': '17', 'fontweight' : 'bold'})

With this graph, it is complicated to see an increase in the number of fires. Those numbers remains very high.
Let's look at the number of fires by month.

In [None]:
nb_fires_per_month = data.groupby(['month']).sum().reindex(['Janeiro','Fevereiro','Março','Abril','Maio','Julho','Julho','Agosto',
                                                           'Setembro','Outubro','Novembro','Dezembro']).reset_index()
nb_fires_per_month

In [None]:
sns.set()
plt.figure(figsize=(15,3))

ax = sns.barplot(x="month", y="number", data=nb_fires_per_month,palette="Reds")
ax.set_ylabel('')    
ax.set_xlabel('')
ax.set_title("Number of fires by month in Brazil since 1998",fontdict={'fontsize': '17', 'fontweight' : 'bold'})

May to November correspond to the driest period in Brazil. This is why there is more fire in this period. However, Amazonia remains a humid area all the year. That's why we cannot consider that this relatively dry period is the only factor of the increase of the number of fires.

## Analysis by states

In [None]:
fires_states_2017 = data.groupby('state').sum().reset_index()
fires_states_2017 = fires_states_2017.sort_values(by=['number'],ascending = True)
sns.set()
plt.figure(figsize=(30,3))

ax = sns.barplot(x="state", y="number", data=fires_states_2017,palette="Reds")
ax.set_ylabel('')    
ax.set_xlabel('')
ax.set_title("Number of fires by states in Brazil since 1998",fontdict={'fontsize': '17', 'fontweight' : 'bold'})

In [None]:
evolution_nb_fire = pd.DataFrame(columns =['state','mean_3_first_years','mean_3_last_years','diff','diff_percentage'])
nb_fires_per_year_and_state = data.groupby(['state','year']).sum()
for state in states:
    init_val = int((nb_fires_per_year_and_state.loc[state,1998]['number']+nb_fires_per_year_and_state.loc[state,1999]['number']+nb_fires_per_year_and_state.loc[state,2000]['number'])/3)
    final_val = int((nb_fires_per_year_and_state.loc[state,2015]['number']+nb_fires_per_year_and_state.loc[state,2016]['number']+nb_fires_per_year_and_state.loc[state,2017]['number'])/3)
    evolution_nb_fire = evolution_nb_fire.append({'state':state,
                                                    'mean_3_first_years':init_val,
                                                    'mean_3_last_years':final_val,
                                                    'diff':final_val-init_val,
                                                    'diff_percentage':((final_val-init_val)/init_val)*100},
                                                    ignore_index=True)

evolution_nb_fire = evolution_nb_fire.set_index('state').loc[['Acre','Amazonas','Mato Grosso','Roraima','Pará','Tocantins']]
evolution_nb_fire

To get those numbers, I compute the mean of the number of fires for the years 1998, 1999 and 2000 then for the years 2015, 2016 and 2017 and I substract the two values. I only keep states which are part of amazonia. It help us understand the evolution in different states. 

In [None]:
fires_states = nb_fires_per_year_and_state.reset_index()

few_fires_states = fires_states[fires_states['state'].isin(['Acre','Amazonas','Roraima'])]
big_fires_states = fires_states[fires_states['state'].isin(['Mato Grosso','Pará','Tocantins'])]

In [None]:
sns.set()
plt.figure(figsize=(15,10))
locator = matplotlib.ticker.MultipleLocator()
plt.gca().xaxis.set_major_locator(locator)
formatter = matplotlib.ticker.StrMethodFormatter("{x:.0f}")
plt.gca().xaxis.set_major_formatter(formatter)
ax = sns.lineplot(x="year", y="number", hue="state", data=few_fires_states)
ax.set_ylabel('')    
ax.set_xlabel('')
ax.set_title("Number of fires per year and states",fontdict={'fontsize': '17', 'fontweight' : 'bold'})

The three states have more and more fires every years. The most impressive curve is the one of Amazona's state. The number of fires went from approximatly 1.000 in 1998 to more than 14.000 in 2017 !

In [None]:
sns.set()
plt.figure(figsize=(15,10))
locator = matplotlib.ticker.MultipleLocator()
plt.gca().xaxis.set_major_locator(locator)
formatter = matplotlib.ticker.StrMethodFormatter("{x:.0f}")
plt.gca().xaxis.set_major_formatter(formatter)
ax = sns.lineplot(x="year", y="number", hue="state", data=big_fires_states)
ax.set_title("Number of fires per year and states",fontdict={'fontsize': '17', 'fontweight' : 'bold'})
ax.set_ylabel('')    
ax.set_xlabel('')

The number of fires in Mato Grosso decrease. In contrary, we can see an increase for the state of Para.