In [244]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import os

In [248]:
path='data/'
files = [file for file in os.listdir(path) if '.xlsx' in file]
dfs = [pd.read_excel(os.path.join(path, file), sep=',') for file in files]

In [249]:
land_area = pd.read_csv('data/historical-database-clean.csv')

In [250]:
microregions = dfs[0]
deforestation = dfs[1]
productivity = dfs[2]

In [251]:
city_to_microregion = dict(dfs[0].set_index('City')['Microregion'])
citycode_to_microregion = dict(dfs[0].set_index('city_code')['Microregion'])

### 0. Total land area by microregion

In [266]:
land_area['microregion'] = land_area['city_code'].map(citycode_to_microregion)
region_prod_area = land_area.set_index('year').loc['01/01/2017'].groupby(['microregion', 'product']).sum().sort_index()
region_prod_area.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,area
microregion,product,Unnamed: 2_level_1
Almeirim,Açaí,378.0
Almeirim,Beans,105.0
Almeirim,Cassava,580.0
Almeirim,Cocoa,47.0
Almeirim,Corn,240.0


### 1. deforestation rate by microregion

In [267]:
dfs[1]['Microregion'] = dfs[1]['City'].map(city_to_microregion)
def_df = dfs[1].groupby(['Microregion']).sum().drop('City ID_CodIbge', 1)
def_df = def_df.iloc[:, 1:] / def_df.iloc[:, [0]].values
def_df.columns = def_df.columns.values + ' - rate'
def_df.head()

Unnamed: 0_level_0,Deforestation area 2017 (Km2) - rate,Deforestation area 2018 (Km2) - rate,Deforestation area 2019 (Km2) - rate
Microregion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Almeirim,0.000406,0.000616,0.000794
Altamira,0.003725,0.003791,0.007668
Arari,0.000196,2.4e-05,2.1e-05
Belém,0.001178,0.0007,0.000605
Bragantina,0.000673,0.00024,0.000103


In [286]:
products = land_area['product'].unique()
def_rate = []
for prod in products:
    df = def_df.copy()
    df['Product'] = prod
    def_rate.append(df)
def_rate = pd.concat(def_rate).reset_index(drop=False).set_index(['Microregion', 'Product']).sort_index().mean(1).to_frame()
def_rate.columns = ['average deforestation rate (2017-2019)']

In [288]:
def_rate

Unnamed: 0_level_0,Unnamed: 1_level_0,deforestation rate 2017
Microregion,Product,Unnamed: 2_level_1
Almeirim,Açaí,0.000606
Almeirim,Beans,0.000606
Almeirim,Cassava,0.000606
Almeirim,Cocoa,0.000606
Almeirim,Corn,0.000606
...,...,...
Óbidos,Others,0.000365
Óbidos,Palm oil,0.000365
Óbidos,Rice,0.000365
Óbidos,Sorghum,0.000365


### 2. Productivty by microregion and product

In [273]:
dfs[2]['Microregion'] = dfs[2]['City'].map(city_to_microregion)
total_prod = dfs[2].groupby(['Microregion', 'Product']).sum()[['Production value (R$ thousands)']]

In [274]:
prod_area = pd.concat([total_prod, region_prod_area], 1)
productivity = (prod_area['Production value (R$ thousands)'] / prod_area['area']).rename('productivity (R$/hec)').to_frame().sort_index()
productivity

Unnamed: 0_level_0,Unnamed: 1_level_0,productivity (R$/hec)
Microregion,Product,Unnamed: 2_level_1
Almeirim,Açaí,20.261905
Almeirim,Beans,1.323810
Almeirim,Cassava,5.727586
Almeirim,Cocoa,4.659574
Almeirim,Corn,0.766667
...,...,...
Óbidos,Others,16.139959
Óbidos,Palm oil,
Óbidos,Rice,0.516667
Óbidos,Sorghum,


### 4. product rank

In [221]:
ranks = {
    'Livestock': 5, 'Soy': 5, 'Sorghum': 4, 'Corn': 4,
    'Beans': 3, 'Cassava': 3, 'Rice': 3, 'Others': 2,
    'Palm oil': 1, 'Açaí': 1, 'Cocoa': 1
}

In [275]:
prods_ranks = pd.DataFrame(productivity.index.map(lambda index_value: ranks[index_value[1]]), index=productivity.index, columns=['rank']).sort_index()

In [276]:
prods_ranks

Unnamed: 0_level_0,Unnamed: 1_level_0,rank
Microregion,Product,Unnamed: 2_level_1
Almeirim,Açaí,1
Almeirim,Beans,3
Almeirim,Cassava,3
Almeirim,Cocoa,1
Almeirim,Corn,4
...,...,...
Óbidos,Others,2
Óbidos,Palm oil,1
Óbidos,Rice,3
Óbidos,Sorghum,4


### Calculating microregion-product criteria scores

In [282]:
pd.concat([region_prod_area, def_rate['Deforestation area 2017 (Km2) - rate'], productivity, prods_ranks], 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,area,Deforestation area 2017 (Km2) - rate,productivity (R$/hec),rank
microregion,product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Almeirim,Açaí,378.0,0.000406,20.261905,1
Almeirim,Beans,105.0,0.000406,1.323810,3
Almeirim,Cassava,580.0,0.000406,5.727586,3
Almeirim,Cocoa,47.0,0.000406,4.659574,1
Almeirim,Corn,240.0,0.000406,0.766667,4
...,...,...,...,...,...
Óbidos,Others,986.0,0.000287,16.139959,2
Óbidos,Palm oil,0.0,0.000287,,1
Óbidos,Rice,60.0,0.000287,0.516667,3
Óbidos,Sorghum,0.0,0.000287,,4


# 1. Remove combinations with null values for land area or production (2017 and 2019 respectivly)

# 4. Remove microregions-products from productivity if combination has no value for 'land area' in 2017

# 3. Unit conversion (Km2 to hec)

# 2. Change scale

# CALCULATE SCORE