## Global Forest Cover


## Part 1: Tree Loss per Year per country

In [34]:
#!pip install openpyxl

In [35]:
# import packages

import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns  # for nicer plots
sns.set(style="darkgrid")  # default style

import tensorflow as tf
from tensorflow import keras
from keras import metrics
tf.get_logger().setLevel('INFO')

import os


Set seeds for reproducibility

In [36]:
import random as python_random

# Sets seeds for reproducibility 
def reset_seeds(seed_val = 0):
   np.random.seed(seed_val) 
   python_random.seed(seed_val)
   tf.random.set_seed(seed_val)

reset_seeds()

In [37]:
# LOAD SHEET 2

gfc_tree_cover_loss = pd.read_excel('../data/GlobalForestWatch/globalforestwatch.xlsx', sheet_name='Country tree cover loss')

gfc_tc_loss_afghanistan = gfc_tree_cover_loss[gfc_tree_cover_loss['country'] == 'Afghanistan']
gfc_tc_loss_afghanistan


Unnamed: 0,country,threshold,area_ha,extent_2000_ha,extent_2010_ha,gain_2000-2020_ha,tc_loss_ha_2001,tc_loss_ha_2002,tc_loss_ha_2003,tc_loss_ha_2004,...,tc_loss_ha_2014,tc_loss_ha_2015,tc_loss_ha_2016,tc_loss_ha_2017,tc_loss_ha_2018,tc_loss_ha_2019,tc_loss_ha_2020,tc_loss_ha_2021,tc_loss_ha_2022,tc_loss_ha_2023
0,Afghanistan,0,64385715,64385715,64385715,10741,103,214,267,225,...,9,0,0,0,32,26,46,47,15,133
1,Afghanistan,10,64385715,432115,126247,10741,92,190,253,207,...,4,0,0,0,28,19,40,37,9,32
2,Afghanistan,15,64385715,302660,106867,10741,91,186,247,205,...,3,0,0,0,28,18,39,32,8,22
3,Afghanistan,20,64385715,284357,105733,10741,89,180,245,203,...,3,0,0,0,28,18,39,32,8,22
4,Afghanistan,25,64385715,254867,72395,10741,89,180,245,202,...,3,0,0,0,27,18,38,28,7,20
5,Afghanistan,30,64385715,205791,71797,10741,88,179,244,201,...,3,0,0,0,26,17,37,26,6,15
6,Afghanistan,50,64385715,148430,46242,10741,78,135,200,159,...,2,0,0,0,19,15,33,23,5,8
7,Afghanistan,75,64385715,75486,18270,10741,47,61,96,61,...,0,0,0,0,10,8,19,9,2,3


// Tree cover loss, tree cover extent, and AGB stock and density are presented for percent canopy cover levels >10%, 15%, 20%, 25%, 30%, 50% and 75% in 2000. Emissions, removals, and net flux are presented only for percent canopy cover levels >30%, 50%, and 75% in 2000, plus areas with tree cover gain between 2000 and 2020 (Potapov et al. 2022) regardless of percent canopy cover. We recommend that you select your desired percent canopy cover level before your analysis and use it consistently throughout analyses. The Global Forest Watch website uses a >30% canopy cover threshold as a default for all statistics.



We will pick threshold for canopy cover to be 30% throughout our analysis.



In [38]:
gfc_tree_cover_loss['threshold'].value_counts()

threshold
0     236
10    236
15    236
20    236
25    236
30    236
50    236
75    236
Name: count, dtype: int64

In [39]:
# FILTER FOR 30% THRESHOLD ONLY

gfc_tree_cover_loss_30_pct_threshold = gfc_tree_cover_loss[gfc_tree_cover_loss['threshold'] == 30]

gfc_tree_cover_loss = gfc_tree_cover_loss_30_pct_threshold

In [40]:
gfc_tree_cover_loss.columns

Index(['country', 'threshold', 'area_ha', 'extent_2000_ha', 'extent_2010_ha',
       'gain_2000-2020_ha', 'tc_loss_ha_2001', 'tc_loss_ha_2002',
       'tc_loss_ha_2003', 'tc_loss_ha_2004', 'tc_loss_ha_2005',
       'tc_loss_ha_2006', 'tc_loss_ha_2007', 'tc_loss_ha_2008',
       'tc_loss_ha_2009', 'tc_loss_ha_2010', 'tc_loss_ha_2011',
       'tc_loss_ha_2012', 'tc_loss_ha_2013', 'tc_loss_ha_2014',
       'tc_loss_ha_2015', 'tc_loss_ha_2016', 'tc_loss_ha_2017',
       'tc_loss_ha_2018', 'tc_loss_ha_2019', 'tc_loss_ha_2020',
       'tc_loss_ha_2021', 'tc_loss_ha_2022', 'tc_loss_ha_2023'],
      dtype='object')

In [41]:
# Complicated steps to convert columnwise year into rows

year_columns = [col for col in gfc_tree_cover_loss if 'tc_loss_ha' in col]

year_data = pd.melt(gfc_tree_cover_loss, id_vars=['country', 'threshold', 'area_ha', 'extent_2000_ha', 'extent_2010_ha', 'gain_2000-2020_ha'], 
                    value_vars=year_columns, 
                    var_name='year', value_name='tc_loss_ha')

# extract year as integer
year_data['year'] = year_data['year'].str.extract('(\d{4})').astype(int)

print(year_data)

gfc_tc_yearly = year_data

gfc_tc_yearly.to_csv('../data/GlobalForestWatch/cleaned/gfc_tc_yearly.csv')

                    country  threshold    area_ha  extent_2000_ha  \
0               Afghanistan         30   64385715          205791   
1     Akrotiri and Dhekelia         30      23448             456   
2                   Albania         30    2873537          648680   
3                   Algeria         30  230804377         1223621   
4                   Andorra         30      45196           19004   
...                     ...        ...        ...             ...   
5423         Western Sahara         30   26732060               0   
5424                  Yemen         30   45220898             126   
5425                 Zambia         30   75049202        24050819   
5426               Zimbabwe         30   39068111         1414326   
5427                  Åland         30     150643          107727   

      extent_2010_ha  gain_2000-2020_ha  year  tc_loss_ha  
0              71797              10741  2001          88  
1                383                 42  2001      

In [42]:
print("Total number of countries", len(gfc_tc_yearly['country'].unique()))

Total number of countries 236


There are 236 unique countries represented in the dataset.

## Part 2: Carbon Emission per Year per country

In [43]:
gfc_carbon = pd.read_excel('../data/GlobalForestWatch/globalforestwatch.xlsx', sheet_name='Country carbon data')
gfc_carbon.head()

Unnamed: 0,country,umd_tree_cover_density_2000__threshold,umd_tree_cover_extent_2000__ha,gfw_aboveground_carbon_stocks_2000__Mg_C,avg_gfw_aboveground_carbon_stocks_2000__Mg_C_ha-1,gfw_forest_carbon_gross_emissions__Mg_CO2e_yr-1,gfw_forest_carbon_gross_removals__Mg_CO2_yr-1,gfw_forest_carbon_net_flux__Mg_CO2e_yr-1,gfw_forest_carbon_gross_emissions_2001__Mg_CO2e,gfw_forest_carbon_gross_emissions_2002__Mg_CO2e,...,gfw_forest_carbon_gross_emissions_2014__Mg_CO2e,gfw_forest_carbon_gross_emissions_2015__Mg_CO2e,gfw_forest_carbon_gross_emissions_2016__Mg_CO2e,gfw_forest_carbon_gross_emissions_2017__Mg_CO2e,gfw_forest_carbon_gross_emissions_2018__Mg_CO2e,gfw_forest_carbon_gross_emissions_2019__Mg_CO2e,gfw_forest_carbon_gross_emissions_2020__Mg_CO2e,gfw_forest_carbon_gross_emissions_2021__Mg_CO2e,gfw_forest_carbon_gross_emissions_2022__Mg_CO2e,gfw_forest_carbon_gross_emissions_2023__Mg_CO2e
0,Afghanistan,0,64385715,25912558,0,,,,,,...,,,,,,,,,,
1,Afghanistan,10,432115,21758845,50,,,,,,...,,,,,,,,,,
2,Afghanistan,15,302660,16568110,55,,,,,,...,,,,,,,,,,
3,Afghanistan,20,284357,15782996,56,,,,,,...,,,,,,,,,,
4,Afghanistan,25,254867,14538150,57,,,,,,...,,,,,,,,,,


In [44]:
gfc_carbon.columns

Index(['country', 'umd_tree_cover_density_2000__threshold',
       'umd_tree_cover_extent_2000__ha',
       'gfw_aboveground_carbon_stocks_2000__Mg_C',
       'avg_gfw_aboveground_carbon_stocks_2000__Mg_C_ha-1',
       'gfw_forest_carbon_gross_emissions__Mg_CO2e_yr-1',
       'gfw_forest_carbon_gross_removals__Mg_CO2_yr-1',
       'gfw_forest_carbon_net_flux__Mg_CO2e_yr-1',
       'gfw_forest_carbon_gross_emissions_2001__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2002__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2003__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2004__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2005__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2006__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2007__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2008__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2009__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2010__Mg_CO2e',
       'gfw_forest_carbon_gross_emissions_2

In [45]:
gfc_carbon['umd_tree_cover_density_2000__threshold'].value_counts()

umd_tree_cover_density_2000__threshold
0     236
10    236
15    236
20    236
25    236
30    236
50    236
75    236
Name: count, dtype: int64

In [46]:
#Filtering for 30% threshold only

gfc_carbon_filtered_by_threshold = gfc_carbon[gfc_carbon['umd_tree_cover_density_2000__threshold'] == 30]
gfc_carbon  = gfc_carbon_filtered_by_threshold



In [47]:
print("Total number of countries", len(gfc_carbon['country'].unique()))

Total number of countries 236


In [48]:
df = gfc_carbon
# Extract columns with year information
year_columns = [col for col in df.columns if '__' in col and any(str(year) in col for year in range(2001, 2024))]

year_data = pd.melt(df, id_vars=['country'], 
                    value_vars=year_columns, 
                    var_name='variable', value_name='value')

# Extract the year from the 'variable' column
year_data['year'] = year_data['variable'].str.extract('(\d{4})').astype(int)


year_data['variable'] = year_data['variable'].str.replace(r'_\d{4}__', '__', regex=True)


# Pivot the DataFrame so that each measurement type becomes a separate column
year_data = year_data.pivot_table(index=['country', 'year'], columns='variable', values='value').reset_index()

# Store cleaned data to CSV file
year_data.to_csv('../data/GlobalForestWatch/cleaned/gfc_carbon_yearly.csv')

year_data

variable,country,year,gfw_forest_carbon_gross_emissions__Mg_CO2e
0,Afghanistan,2001,23243.0
1,Afghanistan,2002,38860.0
2,Afghanistan,2003,45741.0
3,Afghanistan,2004,33108.0
4,Afghanistan,2005,38775.0
...,...,...,...
5423,Åland,2019,720130.0
5424,Åland,2020,206110.0
5425,Åland,2021,421934.0
5426,Åland,2022,365348.0
