This notebook contains functions that combine World Database of Protected Areas and Ramsar data for a country or continent to create a full protected areas dataset for the desired region and calculates the total length of rivers on protected lands and total area of protected lands that are affected by proposed dams by country for a desired continent. Below you will find the code to analyze Asia, Africa, and South America, you simply need to uncomment your selected continent and comment out deselected continents (running all three continents at once will likely crash your notebook!).

In [1]:
# Imports
import warnings
import os
import sys

import numpy as np
import numpy.ma as ma
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines

import earthpy as et
import earthpy.plot as ep
import contextily as ctx

In [2]:
# Check path and set working directory.
wd_path = os.path.join(et.io.HOME, 'earth-analytics', 'data')
if os.path.exists(wd_path):
    os.chdir(wd_path)
else:
    print("Path does not exist")

In [3]:
# Download Data stored on figshare
# Free flowing rivers current DOR
et.data.get_data(url="https://ndownloader.figshare.com/files/23273213")

# Free flowing rivers future DOR
et.data.get_data(url="https://ndownloader.figshare.com/files/23273216")

# World Database of Protected Areas Africa
et.data.get_data(url="https://ndownloader.figshare.com/files/23354894")

# World Database of Protected Areas Asia
et.data.get_data(url="https://ndownloader.figshare.com/files/23354900")

# World Database of Protected Areas South America
et.data.get_data(url="https://ndownloader.figshare.com/files/23355335")

# World Database of Protected Areas Europe
# et.data.get_data(url="https://ndownloader.figshare.com/files/23355098")

# World Database of Protected Areas North America
# et.data.get_data(url="https://ndownloader.figshare.com/files/23355317")

# Ramsar Sites
et.data.get_data(url="https://ndownloader.figshare.com/files/22507082")

# Country boundaries
et.data.get_data(url="https://ndownloader.figshare.com/files/22507058")

# Continent boundaries
et.data.get_data(url="https://ndownloader.figshare.com/files/23392280")

# Continent-country csv
et.data.get_data(url="https://ndownloader.figshare.com/files/23393756")

'/home/jovyan/earth-analytics/data/earthpy-downloads/continent-country.csv'

In [4]:
# Custon Function
def all_pa_continent(wdpa_polys, ramsar_polys, cont_name):
    """ This function takes WDPA polygons for a continent and global ramsar polygons and returns a multipolygon feature 
    of the World Database of Protected Areas merged with the ramsar areas for that continent.

    Parameters
    ----------
    wdpa_polys: gdf
        The feature with the WDPA polygons for the selected continent.
    ramsar_polys: gdf
        The feature with all global ramsar polygons.
    cont_name: str
        The name of the selected continent.

    Returns
    -------
    wdpa_ramsar: gdf
        A gdf of both the ramsar and WDPA protected areas for the continent.
    """
    # Remove ramsar areas from WDPA dataset
    try:
        wdpa_polys.set_index('DESIG', inplace=True)
        wdpa_polys.drop(
            "Ramsar Site, Wetland of International Importance", inplace=True)
    except:
        print('No ramsar areas in WDPA dataset.')

    # Remove duplicates from WDPA dataset (areas tagged by both state and local authorities)
    try:
        wdpa_polys.set_index('NAME', inplace=True)
        wdpa_polys.drop_duplicates(subset=None,
                                   keep='first', inplace=False)
    except:
        print('No duplicates in the WDPA dataset.')

    # Pull out the ramsar areas for the continent or country and merge with protected areas
    ramsar_polys = ramsar_polys[ramsar_polys["continent"] == cont_name]
    wdpa_ramsar = wdpa_polys.append(ramsar_polys, 'sort=True')

    return wdpa_ramsar

In [5]:
# Open continent & country borders & ISOs
country_borders = gpd.read_file(os.path.join(wd_path, "earthpy-downloads", "country-borders",
                                             "99bfd9e7-bb42-4728-87b5-07f8c8ac631c2020328-1-1vef4ev.lu5nk.shp"))

continent_iso = pd.read_csv(os.path.join(wd_path, "earthpy-downloads",
                                         "continent-country.csv"))

continent_borders = gpd.read_file(os.path.join(wd_path, "earthpy-downloads", "continent-poly",
                                               "Continents.shp"))

# Reproject data to World Equidistant Cylindrical, datum WGS84, units meters, EPSG 4087
country_borders = country_borders.to_crs('epsg:4087')
continent_borders = continent_borders.to_crs('epsg:4087')

In [6]:
# Open ramsar areas
ramsar_polys = gpd.read_file(os.path.join(
    "earthpy-downloads", "ramsar-site-data", "ramsar-boundaries",
    "features_publishedPolygon.shp"))

# Rename ramsar columns to match WDPA
try:
    ramsar_polys = ramsar_polys.rename(
        columns={"iso3": "PARENT_ISO", "officialna": "NAME", "area_off": "Shape_Area"})
except:
    print('Ramsar column names already match WDPA dataset.')

# Merge continent names with ramsar data for analyzing by continent
ramsar_polys = pd.merge(ramsar_polys, continent_iso,
                        left_on='PARENT_ISO', right_on='ISO3')

# Data cleaning - take only necessary ramsar columns
ramsar_polys = ramsar_polys[['NAME', 'PARENT_ISO',
                             'Shape_Area', 'continent', 'geometry']]

# Reproject ramsar data to  World Equidistant Cylindrical, datum WGS84, units meters, EPSG 4087
ramsar_polys = ramsar_polys.to_crs('epsg:4087')

In [7]:
# Open current DOR shapefiles
dor_0to5 = gpd.read_file(os.path.join(wd_path,
                                      "earthpy-downloads", "DOR_Binned", "DOR_0to5.shp"))
dor_5to10 = gpd.read_file(os.path.join(wd_path,
                                       "earthpy-downloads", "DOR_Binned", "DOR_5to10.shp"))
dor_10to15 = gpd.read_file(os.path.join(wd_path,
                                        "earthpy-downloads", "DOR_Binned", "DOR_10to15.shp"))
dor_15to20 = gpd.read_file(os.path.join(wd_path,
                                        "earthpy-downloads", "DOR_Binned", "DOR_15to20.shp"))
dor_over20 = gpd.read_file(os.path.join(wd_path,
                                        "earthpy-downloads", "DOR_Binned", "DOR_over20.shp"))

In [8]:
# For loop to (1) calculating difference in DOR between planned and current, (2) pulling only rivers with class > 3,
# (3) projecting rivers, (4) buffer by 1/100 km to become polys for overlay fuction, (5) pull only needed columns

gdf_list = [dor_0to5, dor_5to10, dor_10to15, dor_15to20, dor_over20]
river_list_prj = []

for shp in gdf_list:
    shp['DOR_DIFF'] = shp['DOR_PLA'] - shp['DOR']
    shp = shp[shp.RIV_CLASS > 3]
    shp = shp.to_crs('epsg:4087')
    shp['geometry'] = shp.buffer(10)
    shp = shp[['LENGTH_KM', 'RIV_ORD', 'RIV_CLASS', 'CONTINENT',
               'ISO_NAME', 'BAS_NAME', 'DOR', 'DOR_PLA', 'DOR_DIFF', 'Shape_Leng', 'geometry']]
    river_list_prj.append(shp)

# Re-assign names based on list index
dor_0to5 = river_list_prj[0]
dor_5to10 = river_list_prj[1]
dor_10to15 = river_list_prj[2]
dor_15to20 = river_list_prj[3]
dor_over20 = river_list_prj[4]

# Concatanate all current rivers gdfs for easier analysis
all_rivers = pd.concat([dor_0to5, dor_5to10, dor_10to15,
                        dor_15to20, dor_over20], axis=0)

# Remove rivers that have DOR_DIFF of 0
all_rivers_lg = all_rivers[all_rivers.DOR_DIFF > 0]

In [9]:
# Analyze South America
# Open WDPA polygons
wdpa_sa_polys = gpd.read_file(os.path.join(wd_path,
                                           "earthpy-downloads", "WDPA_S_America", "WDPA_S_America.shp"))

# Data cleaning - remove polygons with no area & take only the needed columns from WDPA dataset
wdpa_sa_polys = wdpa_sa_polys[wdpa_sa_polys.geometry != None]

# Merge continent names with WDPA data for analyzing by continent
wdpa_sa_polys = pd.merge(wdpa_sa_polys, continent_iso,
                        left_on='PARENT_ISO', right_on='ISO3')

# Take only the columns we need
wdpa_sa_polys = wdpa_sa_polys[[
    'NAME', 'DESIG', 'PARENT_ISO', 'Shape_Area', 'continent', 'geometry']]

# Reporject WDPA data
wdpa_sa_polys = wdpa_sa_polys.to_crs('epsg:4087')

# Get the combined WDPA & ramsar areas for selected continent
wdpa_ramsar_sa = all_pa_continent(wdpa_sa_polys, ramsar_polys, "South America")

# Getting river length affected
# Overlay current rivers on protected areas for selected continent to get ONLY rivers the overlap PAs
river_overlap_sa = gpd.overlay(
    wdpa_ramsar_sa, all_rivers_lg, how='intersection')

# Getting protected areas affected
# Overlay projected rivers on pas for selected continent to get ONLY pas that overlap rivers
pa_overlap_sa = gpd.overlay(
    river_overlap_sa, wdpa_ramsar_sa, how='intersection')

# Get a list of countries in each continent for calculating lengths/areas by country later
sa_countries = continent_iso[continent_iso.continent == 'South America']

# Create empty lists
country_sums = []
area_sums = []
countries = []

# Sum up the total river length affected by country in the continent
for country in sa_countries.ISO3:
    country_sums.append((
        river_overlap_sa.loc[river_overlap_sa['PARENT_ISO'] == country, 'LENGTH_KM'].sum()).round(0))
    area_sums.append((
        pa_overlap_sa.loc[pa_overlap_sa['PARENT_ISO_1'] == country, 'Shape_Area_1'].sum()).round(0))
    countries.append(country)

 # Create a pandas dataframe of lengths and areas affected
sa_affected = pd.DataFrame(list(zip(countries, country_sums, area_sums)), columns=[
                           'Country', 'Affected_KM', 'Affected_Area'])

In [22]:
# We need to export the results so we can use it in a different notebook
# sa_affected.to_csv(wd_path, 'dam-project-outputs', 'sa_affected.csv')

IsADirectoryError: [Errno 21] Is a directory: '/home/jovyan/earth-analytics/data'

In [10]:
sa_affected.head()

Unnamed: 0,Country,Affected_KM,Affected_Area
0,ARG,910.0,8752586.0
1,BOL,80.0,41.0
2,BVT,0.0,0.0
3,BRA,2542.0,124350126.0
4,CHL,0.0,0.0


In [11]:
# # Analyze Africa
# wdpa_africa_polys = gpd.read_file(os.path.join(wd_path,
#                                                "earthpy-downloads", "WDPA_Africa", "WDPA_Africa.shp"))

# # Data cleaning - remove polygons with no area & take only the needed columns from WDPA dataset
# wdpa_africa_polys = wdpa_africa_polys[wdpa_africa_polys.geometry != None]

# # Merge continent africames with WDPA data for aafricalyzing by continent
# wdpa_africa_polys = pd.merge(wdpa_africa_polys, continent_iso,
#                         left_on='PARENT_ISO', right_on='ISO3')

# # Take only the columns we need
# wdpa_africa_polys = wdpa_africa_polys[[
#     'NAME', 'DESIG', 'PARENT_ISO', 'GIS_AREA', 'continent', 'geometry']]

# # Reporject WDPA data
# wdpa_africa_polys = wdpa_africa_polys.to_crs('epsg:4087')

# # Get the combined WDPA & ramsar areas for selected continent
# wdpa_ramsar_africa = all_pa_continent(wdpa_africa_polys, ramsar_polys, "Africa")

# # Getting river length affected
# # Overlay current rivers on protected areas for selected continent to get ONLY rivers the overlap PAs
# river_overlap_africa = gpd.overlay(
#     wdpa_ramsar_africa, all_rivers_lg, how='intersection')

# # Getting protected areas affected
# # Overlay projected rivers on pas for selected continent to get ONLY pas that overlap rivers
# pa_overlap_africa = gpd.overlay(
#     river_overlap_africa, wdpa_ramsar_africa, how='intersection')

# # Get a list of countries in each continent for calculating lengths/areas by country later
# africa_countries = continent_iso[continent_iso.continent == 'Africa']

# # Create empty lists
# country_sums = []
# area_sums = []
# countries = []

# # Sum up the total river length affected by country in the continent
# for country in africa_countries.ISO3:
#     country_sums.append((
#         river_overlap_africa.loc[river_overlap_africa['PARENT_ISO'] == country, 'LENGTH_KM'].sum()).round(0))
#     area_sums.append((
#         pa_overlap_africa.loc[pa_overlap_africa['PARENT_ISO_1'] == country, 'Shape_Area_1'].sum()).round(0))
#     countries.append(country)

#  # Create a pandas dataframe of lengths and areas affected
# africa_affected = pd.DataFrame(list(zip(countries, country_sums, area_sums)), columns=[
#                            'Country', 'Affected_KM', 'Affected_Area'])

In [12]:
# # Analyze Asia
# wdpa_asia_polys = gpd.read_file(os.path.join(wd_path,
#                                              "earthpy-downloads", "WDPA_Asia", "WDPA_Asia.shp"))

# # Data cleaning - remove polygons with no area & take only the needed columns from WDPA dataset
# wdpa_asia_polys = wdpa_asia_polys[wdpa_asia_polys.geometry != None]

# # Merge continent asiames with WDPA data for aasialyzing by continent
# wdpa_asia_polys = pd.merge(wdpa_asia_polys, continent_iso,
#                         left_on='PARENT_ISO', right_on='ISO3')

# # Take only the columns we need
# wdpa_asia_polys = wdpa_asia_polys[[
#     'NAME', 'DESIG', 'PARENT_ISO', 'Shape_Area', 'continent', 'geometry']]

# # Reporject WDPA data
# wdpa_asia_polys = wdpa_asia_polys.to_crs('epsg:4087')

# # Get the combined WDPA & ramsar areas for selected continent
# wdpa_ramsar_asia = all_pa_continent(wdpa_asia_polys, ramsar_polys, "Asia")

# # Getting river length affected
# # Overlay current rivers on protected areas for selected continent to get ONLY rivers the overlap PAs
# river_overlap_asia = gpd.overlay(
#     wdpa_ramsar_asia, all_rivers_lg, how='intersection')

# # Getting protected areas affected
# # Overlay projected rivers on pas for selected continent to get ONLY pas that overlap rivers
# pa_overlap_asia = gpd.overlay(
#     river_overlap_asia, wdpa_ramsar_asia, how='intersection')

# # Get a list of countries in each continent for calculating lengths/areas by country later
# asia_countries = continent_iso[continent_iso.continent == 'Asia']

# # Create empty lists
# country_sums = []
# area_sums = []
# countries = []

# # Sum up the total river length affected by country in the continent
# for country in asia_countries.ISO3:
#     country_sums.append((
#         river_overlap_asia.loc[river_overlap_asia['PARENT_ISO'] == country, 'LENGTH_KM'].sum()).round(0))
#     area_sums.append((
#         pa_overlap_asia.loc[pa_overlap_asia['PARENT_ISO_1'] == country, 'Shape_Area_1'].sum()).round(0))
#     countries.append(country)

#  # Create a pandas dataframe of lengths and areas affected
# asia_affected = pd.DataFrame(list(zip(countries, country_sums, area_sums)), columns=[
#                            'Country', 'Affected_KM', 'Affected_Area'])

In [13]:
# # Analyze North America - kernel dies, dataset too big?
# wdpa_na_polys = gpd.read_file(os.path.join(wd_path,
#                                            "earthpy-downloads", "WDPA_N_America", "WDPA_N_America.shp"))
# # Data cleaning - remove polygons with no area & take only the needed columns from WDPA dataset
# wdpa_na_polys = wdpa_na_polys[wdpa_na_polys.geometry != None]

# # Merge continent names with WDPA data for analyzing by continent
# wdpa_na_polys = pd.merge(wdpa_na_polys, continent_iso,
#                         left_on='PARENT_ISO', right_on='ISO3')

# # Take only the columns we need
# wdpa_na_polys = wdpa_na_polys[[
#     'NAME', 'DESIG', 'PARENT_ISO', 'Shape_Area', 'continent', 'geometry']]

# # Reporject WDPA data
# wdpa_na_polys = wdpa_na_polys.to_crs('epsg:4087')

# # Get the combined WDPA & ramsar areas for selected continent
# wdpa_ramsar_na = all_pa_continent(wdpa_na_polys, ramsar_polys, "North America")

# # Getting river length affected
# # Overlay current rivers on protected areas for selected continent to get ONLY rivers the overlap PAs
# river_overlap_na = gpd.overlay(
#     wdpa_ramsar_na, all_rivers_lg, how='intersection')

# # Getting protected areas affected
# # Overlay projected rivers on pas for selected continent to get ONLY pas that overlap rivers
# pa_overlap_na = gpd.overlay(
#     river_overlap_na, wdpa_ramsar_na, how='intersection')

# # Get a list of countries in each continent for calculating lengths/areas by country later
# na_countries = continent_iso[continent_iso.continent == 'North America']

# # Create empty lists
# country_sums = []
# area_sums = []
# countries = []

# # Sum up the total river length affected by country in the continent
# for country in na_countries.ISO3:
#     country_sums.append((
#         river_overlap_na.loc[river_overlap_na['PARENT_ISO'] == country, 'LENGTH_KM'].sum()).round(0))
#     area_sums.append((
#         pa_overlap_na.loc[pa_overlap_na['PARENT_ISO_1'] == country, 'Shape_Area_1'].sum()).round(0))
#     countries.append(country)

#  # Create a pandas dataframe of lengths and areas affected
# na_affected = pd.DataFrame(list(zip(countries, country_sums, area_sums)), columns=[
#                            'Country', 'Affected_KM', 'Affected_Area'])

In [14]:
# # Analyze Europe - kernel dies, dataset too big?
# wdpa_europe_polys = gpd.read_file(os.path.join(wd_path,
#                                                "earthpy-downloads", "WDPA_Europe", "WDPA_Europe.shp"))

# # Data cleaning - remove polygons with no area & take only the needed columns from WDPA dataset
# wdpa_europe_polys = wdpa_europe_polys[wdpa_europe_polys.geometry != None]

# # Merge continent names with WDPA data for analyzing by continent
# wdpa_europe_polys = pd.merge(wdpa_europe_polys, continent_iso,
#                         left_on='PARENT_ISO', right_on='ISO3')

# # Take only the columns we need
# wdpa_europe_polys = wdpa_europe_polys[[
#     'NAME', 'DESIG', 'PARENT_ISO', 'Shape_Area', 'continent', 'geometry']]

# # Reporject WDPA data
# wdpa_europe_polys = wdpa_europe_polys.to_crs('epsg:4087')

# # Get the combined WDPA & rameuroper areas for selected continent
# wdpa_ramsar_europe = all_pa_continent(wdpa_europe_polys, ramsar_polys, "Europe")

# # Getting river length affected
# # Overlay current rivers on protected areas for selected continent to get ONLY rivers the overlap PAs
# river_overlap_europe = gpd.overlay(
#     wdpa_rameuroper_europe, all_rivers_lg, how='intersection')

# # Getting protected areas affected
# # Overlay projected rivers on pas for selected continent to get ONLY pas that overlap rivers
# pa_overlap_europe = gpd.overlay(
#     river_overlap_europe, wdpa_rameuroper_europe, how='intersection')

# # Get a list of countries in each continent for calculating lengths/areas by country later
# europe_countries = continent_iso[continent_iso.continent == 'Europe']

# # Create empty lists
# country_sums = []
# area_sums = []
# countries = []

# # Sum up the total river length affected by country in the continent
# for country in europe_countries.ISO3:
#     country_sums.append((
#         river_overlap_europe.loc[river_overlap_europe['PARENT_ISO'] == country, 'LENGTH_KM'].sum()).round(0))
#     area_sums.append((
#         pa_overlap_europe.loc[pa_overlap_europe['PARENT_ISO_1'] == country, 'Shape_Area_1'].sum()).round(0))
#     countries.append(country)

#  # Create a pandas dataframe of lengths and areas affected
# europe_affected = pd.DataFrame(list(zip(countries, country_sums, area_sums)), columns=[
#                            'Country', 'Affected_KM', 'Affected_Area'])

In [15]:
# # Analyze Australia - runs, however, there are no large rivers overlapping protected areas
# wdpa_aus_polys = gpd.read_file(os.path.join(wd_path,
#                                             "earthpy-downloads", "WDPA_Australia", "WDPA_Australia.shp"))

# # Data cleaning - remove polygons with no area & take only the needed columns from WDPA dataset
# wdpa_aus_polys = wdpa_aus_polys[wdpa_aus_polys.geometry != None]

# # Merge continent ausmes with WDPA data for aauslyzing by continent
# wdpa_aus_polys = pd.merge(wdpa_aus_polys, continent_iso,
#                         left_on='PARENT_ISO', right_on='ISO3')

# # Take only the columns we need
# wdpa_aus_polys = wdpa_aus_polys[[
#     'NAME', 'DESIG', 'PARENT_ISO', 'Shape_Area', 'continent', 'geometry']]

# # Reporject WDPA data
# wdpa_aus_polys = wdpa_aus_polys.to_crs('epsg:4087')

# # Get the combined WDPA & ramsar areas for selected continent
# wdpa_ramsar_aus = all_pa_continent(wdpa_aus_polys, ramsar_polys, "Oceania")

# # Getting river length affected
# # Overlay current rivers on protected areas for selected continent to get ONLY rivers the overlap PAs
# river_overlap_aus = gpd.overlay(
#     wdpa_ramsar_aus, all_rivers_lg, how='intersection')

# # Getting protected areas affected
# # Overlay projected rivers on pas for selected continent to get ONLY pas that overlap rivers
# pa_overlap_aus = gpd.overlay(
#     river_overlap_aus, wdpa_ramsar_aus, how='intersection')

# # Get a list of countries in each continent for calculating lengths/areas by country later
# aus_countries = continent_iso[continent_iso.continent == 'Oceania']

# # Create empty lists
# country_sums = []
# area_sums = []
# countries = []

# # Sum up the total river length affected by country in the continent
# for country in aus_countries.ISO3:
#     country_sums.append((
#         river_overlap_aus.loc[river_overlap_aus['PARENT_ISO'] == country, 'LENGTH_KM'].sum()).round(0))
#     area_sums.append((
#         pa_overlap_aus.loc[pa_overlap_aus['PARENT_ISO_1'] == country, 'Shape_Area_1'].sum()).round(0))
#     countries.append(country)

#  # Create a pandas dataframe of lengths and areas affected
# aus_affected = pd.DataFrame(list(zip(countries, country_sums, area_sums)), columns=[
#                            'Country', 'Affected_KM', 'Affected_Area'])