In [9]:
import pandas as pd
import numpy as np
import requests
import warnings
from dotenv import load_dotenv
from datetime import datetime
import os
import json
import matplotlib.pyplot as plt
import googlemaps

load_dotenv()
warnings.simplefilter("ignore", pd.errors.SettingWithCopyError)

In [63]:
deforestation = pd.read_excel("../datasets/original/deforestation.xlsx", sheet_name = "Subnational 2 tree cover loss")

display(deforestation.info())
deforestation.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8520 entries, 0 to 8519
Data columns (total 30 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   country            8520 non-null   object
 1   subnational1       8520 non-null   object
 2   subnational2       8520 non-null   object
 3   threshold          8520 non-null   int64 
 4   area_ha            8520 non-null   int64 
 5   extent_2000_ha     8520 non-null   int64 
 6   extent_2010_ha     8520 non-null   int64 
 7   gain_2000-2020_ha  8520 non-null   int64 
 8   tc_loss_ha_2001    8520 non-null   int64 
 9   tc_loss_ha_2002    8520 non-null   int64 
 10  tc_loss_ha_2003    8520 non-null   int64 
 11  tc_loss_ha_2004    8520 non-null   int64 
 12  tc_loss_ha_2005    8520 non-null   int64 
 13  tc_loss_ha_2006    8520 non-null   int64 
 14  tc_loss_ha_2007    8520 non-null   int64 
 15  tc_loss_ha_2008    8520 non-null   int64 
 16  tc_loss_ha_2009    8520 non-null   int64 


None

Unnamed: 0,country,subnational1,subnational2,threshold,area_ha,extent_2000_ha,extent_2010_ha,gain_2000-2020_ha,tc_loss_ha_2001,tc_loss_ha_2002,...,tc_loss_ha_2013,tc_loss_ha_2014,tc_loss_ha_2015,tc_loss_ha_2016,tc_loss_ha_2017,tc_loss_ha_2018,tc_loss_ha_2019,tc_loss_ha_2020,tc_loss_ha_2021,tc_loss_ha_2022
0,Colombia,Amazonas,El Encanto,0,1027642,1027642,1027642,773,212,182,...,145,242,168,988,322,178,219,392,299,334
1,Colombia,Amazonas,El Encanto,10,1027642,1010468,1010236,773,212,182,...,145,241,165,985,316,176,218,386,295,320
2,Colombia,Amazonas,El Encanto,15,1027642,1010422,1010189,773,212,182,...,145,241,165,985,316,176,218,386,295,320
3,Colombia,Amazonas,El Encanto,20,1027642,1010269,1010088,773,212,182,...,145,241,165,985,316,176,217,386,295,319
4,Colombia,Amazonas,El Encanto,25,1027642,1010200,1010045,773,212,182,...,145,241,165,985,315,176,217,386,294,319


In [64]:
deforestation_unique = deforestation.copy()

deforestation_unique = deforestation_unique[deforestation_unique["threshold"] == 0].reset_index(drop = True)
deforestation_unique.drop(
    [ x for x in deforestation_unique.columns if x not in ["country", "subnational1", "subnational2"] ], 
    axis = 1, inplace = True
)

deforestation_unique

Unnamed: 0,country,subnational1,subnational2
0,Colombia,Amazonas,El Encanto
1,Colombia,Amazonas,La Chorrera
2,Colombia,Amazonas,La Pedrera
3,Colombia,Amazonas,Leticia
4,Colombia,Amazonas,Mirití-Paraná
...,...,...,...
1060,Colombia,Vichada,La Primavera
1061,Colombia,Vichada,Puerto Carreño
1062,Colombia,Vichada,San Jose de Ocune
1063,Colombia,Vichada,Santa Rita


In [65]:
deforestation_year = (
    pd.wide_to_long(
        deforestation, 
        "tc_loss_ha_", 
        ["country", "subnational1", "subnational2", "threshold"],
        "Year"
    )
    .reset_index()
    .rename({ "tc_loss_ha_": "forest_loss_ha" }, axis = 1)
)

deforestation_year

Unnamed: 0,country,subnational1,subnational2,threshold,Year,gain_2000-2020_ha,extent_2010_ha,area_ha,extent_2000_ha,forest_loss_ha
0,Colombia,Amazonas,El Encanto,0,2001,773,1027642,1027642,1027642,212
1,Colombia,Amazonas,El Encanto,0,2002,773,1027642,1027642,1027642,182
2,Colombia,Amazonas,El Encanto,0,2003,773,1027642,1027642,1027642,88
3,Colombia,Amazonas,El Encanto,0,2004,773,1027642,1027642,1027642,169
4,Colombia,Amazonas,El Encanto,0,2005,773,1027642,1027642,1027642,147
...,...,...,...,...,...,...,...,...,...,...
187435,Colombia,Vichada,Santa Rosalía,75,2018,6493,28283,432087,30129,23
187436,Colombia,Vichada,Santa Rosalía,75,2019,6493,28283,432087,30129,24
187437,Colombia,Vichada,Santa Rosalía,75,2020,6493,28283,432087,30129,38
187438,Colombia,Vichada,Santa Rosalía,75,2021,6493,28283,432087,30129,67


In [10]:
maps = googlemaps.Client(key = os.environ.get("GOOGLE_API_KEY"))

In [26]:
lat = []
long = []
missing = []
for i, row in deforestation.iterrows():
    try:
        result = maps.geocode("{}, {}, {}".format(row["subnational2"], row["subnational1"], row["country"]))
        location = result[0]["geometry"]["location"]
        lat.append(location["lat"])
        long.append(location["lng"])
    except:
        missing.append("{}, {}, {}".format(row["subnational2"], row["subnational1"], row["country"]))

In [27]:
print("Found {} of {}".format(len(long), len(deforestation_unique)))

Found 1065 of 1065


In [68]:
deforestation_unique["Lat"] = lat
deforestation_unique["Long"] = long

In [74]:
elev = []
missing = []
for i, row in deforestation_unique.iterrows():
    try:
        result = maps.elevation((row["Lat"], row["Long"]))
        elev.append(result[0]["elevation"])
    except:
        missing.append((row["Lat"], row["Long"]))
    
print("Found {} of {}".format(len(elev), len(deforestation_unique)))

Found 1065 of 1065


In [75]:
deforestation_unique["elevation"] = elev

display(deforestation_unique.describe())
deforestation_unique

Unnamed: 0,Lat,Long,elevation
count,1065.0,1065.0,1065.0
mean,5.550268,-74.63047,1189.064659
std,2.573126,1.704147,933.219444
min,-4.203165,-81.718525,0.0
25%,4.21655,-75.761865,223.823059
50%,5.503838,-74.755201,1191.455566
75%,6.854309,-73.418021,1909.895264
max,13.381425,-67.046459,3670.898682


Unnamed: 0,country,subnational1,subnational2,Lat,Long,elevation
0,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346
1,Colombia,Amazonas,La Chorrera,-1.378972,-72.804280,161.210556
2,Colombia,Amazonas,La Pedrera,-1.322004,-69.578386,76.290291
3,Colombia,Amazonas,Leticia,-4.203165,-69.935907,78.378479
4,Colombia,Amazonas,Mirití-Paraná,-0.716580,-71.101650,158.658432
...,...,...,...,...,...,...
1060,Colombia,Vichada,La Primavera,5.491592,-70.413995,120.220474
1061,Colombia,Vichada,Puerto Carreño,6.189912,-67.482570,56.308125
1062,Colombia,Vichada,San Jose de Ocune,4.245270,-70.328300,131.086746
1063,Colombia,Vichada,Santa Rita,4.488860,-69.791680,163.811569


In [76]:
df = pd.merge(
    deforestation_unique,
    deforestation_year,
    on = ["country", "subnational1", "subnational2"]
)

df

Unnamed: 0,country,subnational1,subnational2,Lat,Long,elevation,threshold,Year,gain_2000-2020_ha,extent_2010_ha,area_ha,extent_2000_ha,forest_loss_ha
0,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2001,773,1027642,1027642,1027642,212
1,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2002,773,1027642,1027642,1027642,182
2,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2003,773,1027642,1027642,1027642,88
3,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2004,773,1027642,1027642,1027642,169
4,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2005,773,1027642,1027642,1027642,147
...,...,...,...,...,...,...,...,...,...,...,...,...,...
187435,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2018,6493,28283,432087,30129,23
187436,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2019,6493,28283,432087,30129,24
187437,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2020,6493,28283,432087,30129,38
187438,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2021,6493,28283,432087,30129,67


In [77]:
df.to_csv("../datasets/checkpoints/forest_loss.csv", quoting = False, index = False)