## Setup the environment

In [None]:
!mkdir -p ~/.aws
!pip install -qq psycopg2-binary awscli boto3 s3fs

In [2]:
%%writefile ~/.aws/credentials
[default]
aws_access_key_id=
aws_secret_access_key=
region=us-east-1
output=json

Writing /root/.aws/credentials


In [3]:
import boto3
import json

%reload_ext sql

In [5]:
def get_secret(secret_name):
    region_name = "us-east-1"
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
    return get_secret_value_response

db_credentials = get_secret(secret_name='wysde')

USERNAME = db_credentials["RDS_POSTGRES_USERNAME"]
PASSWORD = db_credentials["RDS_POSTGRES_PASSWORD"]
HOST = db_credentials["RDS_POSTGRES_HOST"]
PORT = 5432
DBNAME = "postgres"
CONN = f"postgresql://{USERNAME}:{PASSWORD}@{HOST}:{PORT}/{DBNAME}"

%sql {CONN}

'Connected: postgres@postgres'

In [None]:
SCHEMA = "deforestation"
%sql CREATE SCHEMA IF NOT EXISTS {SCHEMA}
%sql SET search_path = {SCHEMA}

In [100]:
import pandas as pd

forest_area = pd.read_csv("forest_area.csv")
display(forest_area)
forest_area.info()
forest_area.to_sql("forest_area", CONN, schema=SCHEMA)

land_area = pd.read_csv("land_area.csv")
display(land_area)
land_area.info()
land_area.to_sql("land_area", CONN, schema=SCHEMA)

regions = pd.read_csv("regions.csv")
display(regions)
regions.info()
regions.to_sql("regions", CONN, schema=SCHEMA)

## Initial Forestation View

The first step is to create a view called 'forestation' by joining all three tables. Keep in mind that the column forest_area_sqkm in the forest_area table and the land_area_sqmi in the land_area table are in different units.

In [102]:
%%sql
CREATE VIEW forestation AS SELECT fa.country_code,
  fa.country_name,
  fa.year,
  fa.forest_area_sqkm,
  la.total_area_sq_mi,
  la.total_area_sq_mi * 2.59 AS total_area_sqkm, ROUND(((fa.forest_area_sqkm/(la.total_area_sq_mi * 2.59))*100)::smallint, 2) AS
  forest_as_percent_of_land, r.region,
  r.income_group FROM forest_area fa
JOIN land_area la
ON fa.country_code = la.country_code AND fa.year = la.year JOIN regions r
ON la.country_code = r.country_code;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
Done.


[]

In [103]:
%sql SELECT * FROM forestation LIMIT 10;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
10 rows affected.


country_code,country_name,year,forest_area_sqkm,total_area_sq_mi,total_area_sqkm,forest_as_percent_of_land,region,income_group
ABW,Aruba,2016,4.199999869,69.5,180.005,2.0,Latin America & Caribbean,High income
AFG,Afghanistan,2016,13500.0,252069.5,652860.005,2.0,South Asia,Low income
AGO,Angola,2016,577311.9922,481351.35,1246699.9965,46.0,Sub-Saharan Africa,Lower middle income
ALB,Albania,2016,7705.39978,10579.15,27399.9985,28.0,Europe & Central Asia,Upper middle income
AND,Andorra,2016,160.0,181.47,470.0073,34.0,Europe & Central Asia,High income
ARE,United Arab Emirates,2016,3236.600037,27420.85,71020.0015,5.0,Middle East & North Africa,High income
ARG,Argentina,2016,268151.9922,1056637.07,2736690.0113,10.0,Latin America & Caribbean,High income
ARM,Armenia,2016,3322.000122,10992.28,28470.0052,12.0,Europe & Central Asia,Upper middle income
ASM,American Samoa,2016,175.0,77.22,199.9998,88.0,East Asia & Pacific,Upper middle income
ATG,Antigua and Barbuda,2016,98.00000191,169.88,439.9892,22.0,Latin America & Caribbean,High income


## Part 1 - Global Situation

What was the total forest area (in sq km) of the world in 1990? Please keep in mind that you can use the country record denoted as “World" in the region table.

In [104]:
%%sql
SELECT country_name, forest_area_sqkm
FROM forest_area
WHERE country_name = 'World'
AND year = 1990;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


country_name,forest_area_sqkm
World,41282694.9


What was the total forest area (in sq km) of the world in 2016? Please keep in mind that you can use the country record in the table is denoted as “World.”

In [105]:
%%sql
SELECT country_name, forest_area_sqkm
FROM forest_area
WHERE country_name = 'World'
AND year = 2016;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


country_name,forest_area_sqkm
World,39958245.9


What was the change (in sq km) in the forest area of the world from 1990 to 2016? What was the percent change in forest area of the world between 1990 and 2016?

In [107]:
%%sql
--1990 forest area
WITH t1 AS(
SELECT country_name, forest_area_sqkm AS area_1990
FROM forest_area
WHERE country_name = 'World'
AND year = 1990
),
-- 2016 forest area
t2 AS(
SELECT country_name, forest_area_sqkm AS area_2016
FROM forest_area
WHERE country_name = 'World'
AND year = 2016
)
-- Working out the difference and percent change
SELECT area_2016, area_1990, area_2016-area_1990 AS area_change, ROUND(((area_2016-area_1990)/area_1990 *100)::smallint, 2) AS area_percent_change FROM t1, t2;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


area_2016,area_1990,area_change,area_percent_change
39958245.9,41282694.9,-1324449.0,-3.0


If you compare the amount of forest area lost between 1990 and 2016, to which country's total area in 2016 is it closest to?

In [111]:
%%sql
SELECT country_name, year, ROUND((total_area_sq_mi*2.59)::int,0) AS total_area_sqkm
FROM land_area
WHERE year = 2016
AND (total_area_sq_mi*2.59) <
((SELECT forest_area_sqkm FROM forest_area
WHERE country_name = 'World' AND year = 1990) -
(SELECT forest_area_sqkm FROM forest_area
WHERE country_name = 'World' AND year = 2016))
ORDER BY total_area_sq_mi DESC LIMIT 1;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


country_name,year,total_area_sqkm
Peru,2016,1280000


## Part 2 - Regional Outlook

What was the percent forest of the entire world in 2016? Which region had the HIGHEST percent forest in 2016, and which had the LOWEST, to 2 decimal places?

In [112]:
%%sql
SELECT country_name, forest_as_percent_of_land
FROM forestation
WHERE country_name = 'World'
AND year = 2016;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


country_name,forest_as_percent_of_land
World,31.0


In [114]:
%%sql
SELECT region, ROUND(((SUM(forest_area_sqkm)/SUM(total_area_sqkm))*100)::int, 2) AS region_percent
FROM forestation
WHERE year = 2016
GROUP BY 1 ORDER BY 2 DESC;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
8 rows affected.


region,region_percent
Latin America & Caribbean,46.0
Europe & Central Asia,38.0
North America,36.0
World,31.0
Sub-Saharan Africa,29.0
East Asia & Pacific,26.0
South Asia,18.0
Middle East & North Africa,2.0


What was the percent forest of the entire world in 1990? Which region had the HIGHEST percent forest in 1990, and which had the LOWEST, to 2 decimal places?

In [115]:
%%sql
SELECT country_name, forest_as_percent_of_land
FROM forestation
WHERE country_name = 'World'
AND year = 1990;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


country_name,forest_as_percent_of_land
World,32.0


In [116]:
%%sql
SELECT region, ROUND(((SUM(forest_area_sqkm)/SUM(total_area_sqkm))*100)::int, 2) AS region_percent
FROM forestation
WHERE year = 1990
GROUP BY 1 ORDER BY 2 DESC;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
8 rows affected.


region,region_percent
Latin America & Caribbean,51.0
Europe & Central Asia,37.0
North America,36.0
World,32.0
Sub-Saharan Africa,31.0
East Asia & Pacific,26.0
South Asia,17.0
Middle East & North Africa,2.0


## Part 3 - Country-level detail

In [119]:
%%sql
-- 1990 forest area
WITH t1 AS (
SELECT country_name, year, forest_area_sqkm AS area_1990
FROM forest_area
WHERE forest_area_sqkm IS NOT NULL
AND country_name <> 'World'
AND year = 1990
ORDER BY 3 DESC),
-- 2016 forest area
t2 AS(
SELECT country_name, year, forest_area_sqkm AS area_2016
FROM forest_Area
WHERE forest_area_sqkm IS NOT NULL
AND country_name <> 'World'
AND year = 2016 )
-- Difference between 1990 and 2016
SELECT t1.country_name, area_1990, area_2016, ROUND((area_2016 - area_1990)::int,0) AS area_difference, ROUND(((area_2016-area_1990)/area_1990 *100)::int,2) AS area_difference_percent
FROM t1
JOIN t2
ON t1.country_name = t2.country_name
ORDER BY 4 DESC
LIMIT 5;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
5 rows affected.


country_name,area_1990,area_2016,area_difference,area_difference_percent
China,1571405.938,2098635.0,527229,34.0
United States,3024500.0,3103700.0,79200,3.0
India,639390.0,708603.9844,69214,11.0
Russian Federation,8089500.0,8148895.0,59395,1.0
Vietnam,93630.0,149020.0,55390,59.0


Which 5 countries saw the largest amount decrease in forest area from 1990 to 2016? What was the difference in forest area for each?

In [122]:
%%sql
-- 1990 forest area
WITH t1 AS (
SELECT country_name, region, forest_area_sqkm AS area_1990
FROM forestation
WHERE forest_area_sqkm IS NOT NULL
AND country_name <> 'World'
AND year = 1990 ORDER BY 3 DESC ),
-- 2016 forest area
t2 AS (
SELECT country_name, region, forest_area_sqkm AS area_2016
FROM forestation
WHERE forest_area_sqkm IS NOT NULL
AND country_name <> 'World'
AND year = 2016 ORDER BY 3 DESC )
-- Difference between two tables
SELECT t1.country_name, t1.region, area_1990, area_2016, ROUND((area_1990-area_2016)::int,0) AS area_difference
FROM t1
JOIN t2
ON t1.country_name = t2.country_name
ORDER BY 5 DESC
LIMIT 5;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
5 rows affected.


country_name,region,area_1990,area_2016,area_difference
Brazil,Latin America & Caribbean,5467050.0,4925540.0,541510
Indonesia,East Asia & Pacific,1185450.0,903256.0156,282194
Myanmar,East Asia & Pacific,392180.0,284945.9961,107234
Nigeria,Sub-Saharan Africa,172340.0,65833.99902,106506
Tanzania,Sub-Saharan Africa,559200.0,456880.0,102320


Which 5 countries saw the largest percent decrease in forest area from 1990 to 2016? What was the percent change to 2 decimal places for each?

In [123]:
%%sql
-- 1990 forest area
WITH t1 AS(
SELECT country_name, region, forest_area_sqkm AS area_1990
FROM forestation
WHERE forest_area_sqkm IS NOT NULL
AND country_name <> 'World'
AND year = 1990 ORDER BY 3 DESC ),
-- 2016 forest area
t2 AS(
SELECT country_name, region, forest_area_sqkm AS area_2016
FROM forestation
WHERE forest_area_sqkm IS NOT NULL
AND country_name <> 'World'
AND year = 2016 ORDER BY 3 DESC )
-- Difference between the two
SELECT t1.country_name, t1.region, area_1990, area_2016, ROUND((area_1990-area_2016)::int,0) AS area_difference, ROUND(((area_1990-area_2016)/area_1990*100)::int,0) AS area_percent FROM t1
JOIN t2
ON t1.country_name = t2.country_name
ORDER BY 6 DESC
LIMIT 5;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
5 rows affected.


country_name,region,area_1990,area_2016,area_difference,area_percent
Togo,Sub-Saharan Africa,6850.0,1681.999969,5168,75
Nigeria,Sub-Saharan Africa,172340.0,65833.99902,106506,62
Uganda,Sub-Saharan Africa,47510.0,19418.00049,28092,59
Mauritania,Sub-Saharan Africa,4150.0,2210.0,1940,47
Honduras,Latin America & Caribbean,81360.0,44720.0,36640,45


If countries were grouped by percent forestation in quartiles, which group had the most countries in it in 2016?

In [124]:
%%sql
-- 2016 table
WITH t1 AS(
SELECT country_name, forest_as_percent_of_land
FROM forestation
WHERE year = 2016
AND country_name <> 'World'
)
-- working out the quartiles
SELECT
COUNT(CASE WHEN t1.forest_as_percent_of_land <= 25 THEN 1 ELSE NULL END) AS quartile_1,
COUNT(CASE WHEN t1.forest_as_percent_of_land > 25 AND t1.forest_as_percent_of_land <= 50 THEN 1 ELSE NULL END) AS quartile_2,
COUNT(CASE WHEN t1.forest_as_percent_of_land > 50 AND t1.forest_as_percent_of_land <= 75 THEN 1 ELSE NULL END) AS quartile_3,
COUNT(CASE WHEN t1.forest_as_percent_of_land > 75 THEN 1 ELSE NULL END) AS quartile_4
FROM t1;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


quartile_1,quartile_2,quartile_3,quartile_4
86,73,36,9


List all of the countries that were in the 4th quartile (percent forest > 75%) in 2016.

In [125]:
%%sql
SELECT country_name, region, forest_as_percent_of_land
FROM forestation
WHERE year = 2016
AND country_name <> 'World'
AND forest_as_percent_of_land > 75 ORDER BY 3 DESC;

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
9 rows affected.


country_name,region,forest_as_percent_of_land
Suriname,Latin America & Caribbean,98.0
"Micronesia, Fed. Sts.",East Asia & Pacific,92.0
Gabon,Sub-Saharan Africa,90.0
American Samoa,East Asia & Pacific,88.0
Palau,East Asia & Pacific,88.0
Seychelles,Sub-Saharan Africa,88.0
Guyana,Latin America & Caribbean,84.0
Lao PDR,East Asia & Pacific,82.0
Solomon Islands,East Asia & Pacific,78.0


How many countries had a percent forestation higher than the United States in 2016?

In [126]:
%%sql
SELECT count(*)
FROM forestation
WHERE year = 2016
AND country_name <> 'World'
AND forest_as_percent_of_land > (SELECT forest_as_percent_of_land
FROM forestation
WHERE year = 2016
AND country_name = 'United States');

 * postgresql://postgres:***@database-1.cy8ltogyfgas.us-east-1.rds.amazonaws.com:5432/postgres
1 rows affected.


count
91
