# Deforestation Forecast

In [15]:
# importing libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re

from plotly.offline import init_notebook_mode, iplot, plot
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px

%matplotlib inline

## Exploratory Data Analysis

In [16]:
def_amazon_data = pd.read_csv('/content/def_area_2004_2019.csv', parse_dates=True, encoding = "cp1252")
def_amazon_data.head(20)

Unnamed: 0,ï»¿Ano/Estados,AC,AM,AP,MA,MT,PA,RO,RR,TO,AMZ LEGAL
0,2004,728,1232,46,755,11814,8870,3858,311,158,27772
1,2005,592,775,33,922,7145,5899,3244,133,271,19014
2,2006,398,788,30,674,4333,5659,2049,231,124,14286
3,2007,184,610,39,631,2678,5526,1611,309,63,11651
4,2008,254,604,100,1271,3258,5607,1136,574,107,12911
5,2009,167,405,70,828,1049,4281,482,121,61,7464
6,2010,259,595,53,712,871,3770,435,256,49,7000
7,2011,280,502,66,396,1120,3008,865,141,40,6418
8,2012,305,523,27,269,757,1741,773,124,52,4571
9,2013,221,583,23,403,1139,2346,932,170,74,5891


In [17]:
def_amazon_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   ï»¿Ano/Estados  16 non-null     int64
 1   AC              16 non-null     int64
 2   AM              16 non-null     int64
 3   AP              16 non-null     int64
 4   MA              16 non-null     int64
 5   MT              16 non-null     int64
 6   PA              16 non-null     int64
 7   RO              16 non-null     int64
 8   RR              16 non-null     int64
 9   TO              16 non-null     int64
 10  AMZ LEGAL       16 non-null     int64
dtypes: int64(11)
memory usage: 1.5 KB


In [18]:
def_amazon_data.describe()

Unnamed: 0,ï»¿Ano/Estados,AC,AM,AP,MA,MT,PA,RO,RR,TO,AMZ LEGAL
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,2011.5,357.625,776.5625,38.5,519.875,2691.5625,3923.625,1392.4375,243.1875,77.5625,10020.9375
std,4.760952,172.476037,300.648625,23.557023,316.705305,2940.367595,1945.802487,943.567413,150.567468,63.238141,6112.467134
min,2004.0,167.0,405.0,8.0,209.0,757.0,1741.0,435.0,121.0,21.0,4571.0
25%,2007.75,256.25,568.0,24.0,257.75,1108.75,2411.25,842.0,139.0,46.75,6365.25
50%,2011.5,292.5,661.0,30.5,399.5,1525.5,3389.0,1189.5,198.5,57.5,7500.0
75%,2015.25,409.5,1012.0,47.75,722.75,2823.0,5546.25,1434.75,269.25,82.25,11966.0
max,2019.0,728.0,1421.0,100.0,1271.0,11814.0,8870.0,3858.0,617.0,271.0,27772.0


In [20]:
def_amazon_data.size

176

In [21]:
def_amazon_data.shape

(16, 11)

## Data Preprocessing

In [3]:
#Renaming the fields for better understanding
def_amazon_data.rename({
    'ï»¿Ano/Estados':'Year',
    'AC':'Acre',
    'AM': 'Amazonas',
    'AP': 'Amapa',
    'MA':'Maranhao',
    'MT':'Mato Grosso',
    'PA':'Para',
    'RO':'Rondonia',
    'RR':'Roraima',
    'TO':'Tocantins',
    'AMZ LEGAL':'Total'
}, axis=1, inplace=True)

def_amazon_data.head()

Unnamed: 0,Year,Acre,Amazonas,Amapa,Maranhao,Mato Grosso,Para,Rondonia,Roraima,Tocantins,Total
0,2004,728,1232,46,755,11814,8870,3858,311,158,27772
1,2005,592,775,33,922,7145,5899,3244,133,271,19014
2,2006,398,788,30,674,4333,5659,2049,231,124,14286
3,2007,184,610,39,631,2678,5526,1611,309,63,11651
4,2008,254,604,100,1271,3258,5607,1136,574,107,12911


## Analysis

In [4]:
fig = px.bar(def_amazon_data, x="Year", y="Total")
fig.update_layout(title_text='Total Deforested Area per Year')
fig.update_xaxes(tickmode='linear')
fig.show()

Here we notice that the highest amount of deforestation occured in the year 2004 but there is a slight increasing trend over the period of 2014-19

In [7]:
#Checking deforestation area by state
states=["Acre","Amazonas","Amapa","Maranhao","Mato Grosso","Para","Rondonia","Roraima","Tocantins"]
data=[]
n = len(states)
for i in range(n):
    data.append(def_amazon_data[states[i]].sum())

avg = sum(data)/n
colors = []
for i in range(n):
  if data[i]>avg:
    colors.append('crimson')
  else:
    colors.append('lightslategray')

fig = go.Figure(data=[go.Bar(x=states, y=data, text=data, textposition='auto', marker_color=colors)])
fig.update_layout(title_text='Total Deforested Area by State')

fig.show()

Here we see that the above average amount of deforestation has occured in three states mainly over the period of 15 years.

In [8]:
fig = make_subplots(rows=3, cols=3, subplot_titles=(states))

for i in range(len(states)):
    fig.add_trace(go.Scatter(x=def_amazon_data['Year'], y=def_amazon_data[states[i]], name = states[i]), row=i//3+1, col=i%3+1)

fig.update_layout(height=800)
fig.update_xaxes(tickmode='linear')
fig.show(config={"displayModeBar": False, "showTips": False})


Here we see the deforestation trends over the last 15 years. The three states that have the highest amount of deforestation trend show that this has not occured in recent times but two out of the three states (Rondonia and Para) still show an increasing trend in the line. These are alarms that are to be taken in consideration to avoid further damage. Moreover areas such as Roraima, Acre and Amazonas might not have the highest amount of deforestation lands but they show and alarming increase.

In [9]:
fig = px.bar(def_amazon_data, x="Year", y=states)
fig.update_layout(title_text='Total Deforested Area per Year', barmode='group', legend_title_text='States')
fig.update_xaxes(tickmode='linear')
fig.show()

We see that Para has the highest deforestation occuring over the year. We also see that Mato Grosso still has a high share in cumulative deforestation but majority of that is contributed by the year 2004 and 2005.

In [10]:
trace1 = go.Bar(x = def_amazon_data['Year'], y=def_amazon_data['Mato Grosso'], name = "Mato Grosso")
trace2 = go.Bar(x = def_amazon_data['Year'], y=def_amazon_data['Para'], name = "Para")
trace3 = go.Bar(x = def_amazon_data['Year'], y=def_amazon_data['Rondonia'], name = "Rondonia")

data = [trace1, trace2, trace3]
layout = go.Layout(barmode = 'group')
fig = go.Figure(data = data, layout = layout)
fig.update_xaxes(tickmode='linear')
iplot(fig)


Here we can look closely into the deforestation trend of the lands with highes areas of deforestation.

In [11]:
states=["Acre","Amazonas","Amapa","Maranhao","Mato Grosso","Para","Rondonia","Roraima","Tocantins","Total"]
data=[]

for i in range(len(states)):
    data.append(go.Scatter(
                    x = def_amazon_data["Year"],
                    y = def_amazon_data[states[i]],
                    mode = "lines+markers",
                    name = states[i]))

layout = go.Layout(title = "Deforestation area (km²)",
                   xaxis = {'title':'Year', 'range':[2004, 2019],},
                   yaxis = {'title':'Value'})

fig = go.Figure(data = data, layout = layout)

iplot(fig)

When we compare the trend lines of all the states together we notice that 2008, 2016 and 2019 are time points where deforestation peaked, thus creating an alarm since one of the time points is very recent.

In [13]:
states=["Acre","Amazonas","Amapa","Maranhao","Mato Grosso","Para","Rondonia","Roraima","Tocantins","Total"]
color_scale=["Viridis","Cividis","Inferno","Hot", "Magma", "Plasma", "Turbo", "Electric", "Blues_r", "RdBu_r", "Thermal", "Aggrnyl", "Haline"]

fig = px.imshow(def_amazon_data[states], x=states, y=list(def_amazon_data["Year"]),
                labels=dict(x="State", y="Year", color="Total"), color_continuous_scale=color_scale[9])
fig.update_yaxes(tickmode='linear')
fig.show()

The above heatmap also shows the trend of deforestation confirming to our previous analysis.

# Conclusion
From the following analysis we can conclude that the areas that require conservation are Para, Mato Grosso and Rondonia.