In [3]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

FAOSTAT_country_data = pd.read_csv("/home/martin/Documents/school/EPFL/ws/ada/disappearing_forests/data/faostat/FAOSTAT_countries.csv", engine='python')
FAOSTAT_countries = FAOSTAT_country_data["Country"].unique()


In [4]:
# load datasets
forest_all = pd.read_csv("/home/martin/Documents/school/EPFL/ws/ada/disappearing_forests/data/faostat/Forestry_E_All_Data_cleared.csv", engine='python')

land = pd.read_csv("/home/martin/Documents/school/EPFL/ws/ada/disappearing_forests/data/faostat/Inputs_LandUse_E_All_Data_cleared.csv",engine='python')

In [5]:
# filter only roundwood production of countires
country_production = forest_all[forest_all.Element.str.match("Production") & 
                                    forest_all.Area.isin(FAOSTAT_countries) &
                                    forest_all.Item.isin(['Roundwood'])]
# select production in 2005
country_production = country_production[country_production.Year == 2015].groupby("Area")["Value"].sum()

# rescale production to percentage
all_production = country_production.sum()
country_production = country_production / all_production
country_production = country_production * 100.0

# sort countries by production
country_production = country_production.sort_values(ascending=False)

In [7]:
# select ofrestry area in countries
forest_area = land[land.Item.isin(['Forestry']) & 
                   land.Area.isin(FAOSTAT_countries) & 
                   land.Year.isin([2015])][["Area", "Unit", "Value", "Item"]]

In [8]:
# rescale to percentage
all_forests = forest_area["Value"].sum()

forest_area["Value"] = forest_area["Value"].div(all_forests)
forest_area["Value"] = forest_area["Value"].mul(100)

In [9]:
# join forested area and production
country_production = pd.merge(country_production, forest_area, left_on="Area", right_on="Area")
country_production = country_production.rename(columns={"Value_x": "Production", "Value_y": "Forested area"})

In [8]:
# round the float precision
country_production = country_production.round(3)

In [9]:
# rename china mainland to china
country_production.loc[country_production["Area"].str.match("China"), "Area"] = "China"

In [10]:
# plot histogram
import plotly.express as px

fig = px.bar(country_production.head(30), x='Area', y='Production', color='Forested area',
             labels={'Area':'Country', "Production": "Share in World production (%)", 'Forested area': 'Share in the World forests (%)'})

fig.update_layout(
    xaxis_title=None,
    width=1000,
    height=500
)

fig.show()

## 7 countires = 50% of world production
Between the top 30 producers of round in the world, we can find countires from all continets. The first seven countires is responsible for more than a half of the production. The forest in these countires combined cointains almost sixty percents of all world forests. But how does the production of round wood influences the deforestation?

In [11]:
# save plot
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
import plotly.graph_objs as go
from IPython.core.display import display, HTML

plot(fig, filename = '2_roundwood_producers.html')


'2_roundwood_producers.html'

In [10]:
list(country_production.head(50).Area.values)

['United States of America',
 'India',
 'China:mainland',
 'Brazil',
 'Russian Federation',
 'Canada',
 'Indonesia',
 'Ethiopia',
 'Democratic Republic of the Congo',
 'Nigeria',
 'Sweden',
 'Chile',
 'Finland',
 'Germany',
 'France',
 'Uganda',
 'Ghana',
 'Mexico',
 'Myanmar',
 'Poland',
 'Viet Nam',
 'Pakistan',
 'Thailand',
 'Australia',
 'New Zealand',
 'Kenya',
 'South Africa',
 'United Republic of Tanzania',
 'Bangladesh',
 'Turkey',
 'Zambia',
 'Japan',
 'Guatemala',
 'Belarus',
 'Mozambique',
 'Ukraine',
 'Egypt',
 'Austria',
 'Spain',
 'Malaysia',
 'Sudan',
 'Argentina',
 'Czechia',
 'Philippines',
 'Romania',
 'Burkina Faso',
 'Somalia',
 'Madagascar',
 'Cameroon',
 'Uruguay']