In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
# Encoding latin1 is working on this database
data = pd.read_csv('/kaggle/input/forest-fires-in-brazil/amazon.csv', encoding='latin1')

In [None]:
data.head()

Things to note
1. Month names are in Portuguese
2. Year as well as date column is given
3. Number of fires is given by column

In [None]:
data[50:55]

Things to Note:
1. Seems date column is redundant because it does not have the month and day of the month. All dates are for the start of the year
2. Also, might make sense to keep the months as integers for plotting
3. Dropping the redundant columns

In [None]:
month_number={'Janeiro': 1, 'Fevereiro': 2, 'Março': 3, 'Abril': 4, 'Maio': 5,
               'Junho': 6, 'Julho': 7, 'Agosto': 8, 'Setembro': 9, 'Outubro': 10,
               'Novembro': 11, 'Dezembro': 12}

month_english={'Janeiro': 'January', 'Fevereiro': 'February', 'Março': 'March', 'Abril': 'April', 'Maio': 'May',
               'Junho': 'June', 'Julho': 'July', 'Agosto': 'August', 'Setembro': 'September', 'Outubro': 'October',
               'Novembro': 'November', 'Dezembro': 'December'}
data['Month'] = data.month.map(month_english)
data['Month No.'] = data.month.map(month_number)
data.drop(['date','month'],axis = 1, inplace = True)

In [None]:
# Just check if everything went right
data.tail()

In [None]:
# Wow no NAs
data.isna().sum()

**PLOTTING STARTS HERE**

![](http://)**Chart 01: Number of Fires Over Years in Brazil** <br>
<br>
Takeaway:
* There might be weak cyclic pattern to the fires, but not beyond resonable doubt
* Exploration with influencing factors in these years, such as air temperature might have been interesting 

In [None]:
fig = plt.figure(figsize = (20,4))
sns.set_style('white') # sets background style as white, other 4 options are whitegrid, dark, darkgrid,ticks 
sns.set_context('talk', font_scale = 0.9) # sets the scale/size of the chart. Other 4 options are paper << notebook << talk <<  poster
yearly_chart = sns.barplot(x = 'year', y = 'number', data = data, color = 'red')
yearly_chart.set(xlabel = 'Year', ylabel = 'Count of Fires', Title = 'Number of Fires in Brazil, 1998-2007')
sns.despine()

**Chart 02: Number of Fires by Month ** <br>
* Fires increase significantly from July onwards.
* Exploration with avg temperature of these months becomes necessary now

In [None]:
fig = plt.figure(figsize = (15,6))
sns.set_style('whitegrid') # see the horizontal lines
sns.set_context('poster', font_scale = 0.6) # see how the font has to be reduced and how all things appear bigger
monthly_chart = sns.barplot(x = 'Month No.',y='number', data = data, color = 'orange')
monthly_chart.set(title = "Total Number of Fires, distributed by Month of Year", xlabel = 'Month Number', ylabel = 'Count of Fires')
sns.despine(offset = 20, left = True)

In [None]:
# gathered avg temperature in Brazil from web and setting it for each month
avg_temp={1:-3.1, 2:-0.8, 3:4.9, 4:11.4, 5:17,
               6:22, 7:24, 8:22.8, 9:19.1, 10:12.7,
               11:5.9,12:0.3}
data['Temp'] = data['Month No.'].map(avg_temp)

**Chart 03: Check if there is a link between avg. monbthly temperature and number of fires **
* Not very strong, but it seems that the fires increase as the temperatures increase

In [None]:
fig = plt.figure(figsize = (15,5))
sns.set_style('white')
sns.set_context('notebook', font_scale = 1.2)
month_chart = sns.lineplot(x = 'Month No.', y = 'number', color = 'orange',data = data, legend = 'full')
ax2 = month_chart.twinx()
sns.lineplot(x = 'Month No.', y = 'Temp',ax = ax2, color = 'red',  data = data,  legend = 'full')
month_chart.set(title = 'Relation between the Temperature and Number of Fires in a Month', xlabel = 'Month', ylabel = 'Count of Fires')
sns.despine(left = True)

**Chart 04: See the distribution of Fires acorss months and years**
* Size and intensity indicates the number of fires reported
* This graph is just to get a top level view of the pattern of fires
* Helps us  identify data that stand out. For example, fires in the month 2-5 are getting worse by the year

In [None]:
fig = plt.figure(figsize = (15,8))
sns.set_style('dark')
sns.set_context('talk', font_scale = 0.9)
year_month_matrix = sns.heatmap(data.pivot_table(index = 'Month No.', columns = 'year',values = 'number',aggfunc='sum'), cmap = 'Reds')
year_month_matrix.set(title = 'Fire Matrix - Year vs. Month')
sns.despine()

**Chart 05: Fires Across States**

1. Seems just 3 states contribute 30% of the fires in Brazil

In [None]:
# Lets make a tree map
import squarify
fig = plt.figure(figsize = (20,10))
sns.set_style('dark')
sns.set_context('talk', font_scale = 0.7)
states_tree_chart = squarify.plot(sizes=a['Pct Fires'], label=a['state'],color = 'red', alpha=0.8, linewidth = 5)
states_tree_chart.set(title = 'Treemap Showing Shareof Different States in Total Fires')