# Calculating Protected areas at the Ethnologue Polygon Level

In [4]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import matplotlib.patches as mpatches
from matplotlib.font_manager import FontProperties

import mapclassify

from rapidfuzz import process, fuzz

from shapely.geometry import Point
from shapely.geometry import MultiPolygon

import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterstats import zonal_stats
from glob import glob
from rasterio.merge import merge
from rasterio.enums import Resampling
from rasterio.io import MemoryFile

In [5]:
# Set base project path
base_path = Path("C:/Users/juami/Dropbox/RAships/2-Folklore-Nathan-Project/EA-Maps-Nathan-project/Measures_work")

# Set file paths
poscol_path = base_path / "data" / "raw" / "ethnologue" / "ancestral_characteristics_database_language_level" / "Ethnologue_16_shapefile" / "langa_no_overlap_biggest_clean.shp"

data_path = base_path / "data" / "interim"
maps_path = base_path / "maps" / "raw"
protectedland_path = maps_path / "Protected_land"

In [6]:
# Read the shapefiles and CSV files
ethnologue = gpd.read_file(poscol_path)

protected0 = gpd.read_file(protectedland_path / "WDPA_Jun2025_Public_shp_0" / "WDPA_Jun2025_Public_shp-polygons.shp")
protected1 = gpd.read_file(protectedland_path / "WDPA_Jun2025_Public_shp_1" / "WDPA_Jun2025_Public_shp-polygons.shp")   
protected2 = gpd.read_file(protectedland_path / "WDPA_Jun2025_Public_shp_2" / "WDPA_Jun2025_Public_shp-polygons.shp")

# Keep only relevant columns
ethnologue = ethnologue[["ID", "geometry"]]

In [7]:
# Combine the three GeoDataFrames
protected_all = pd.concat([protected0, protected1, protected2], ignore_index=True)

# Ensure it's still a GeoDataFrame
protected_all = gpd.GeoDataFrame(protected_all, geometry='geometry', crs=protected0.crs)

In [8]:
protected_all=protected_all[['WDPAID', 'geometry']]
protected_all.head()

Unnamed: 0,WDPAID,geometry
0,1.0,"POLYGON ((-61.82494 17.18497, -61.82497 17.184..."
1,2.0,"POLYGON ((-61.74007 17.52001, -61.77174 17.526..."
2,3.0,"POLYGON ((-65.98955 -22.47423, -65.99441 -22.4..."
3,4.0,"POLYGON ((-61.83791 -24.20686, -61.83781 -24.2..."
4,6.0,"POLYGON ((-73.1485 -49.27008, -73.14368 -49.27..."


In [10]:
# Making sure same CRS
ethnologue = ethnologue.to_crs(protected_all.crs)

# Keeping intersecting geometries 
intersections = gpd.overlay(ethnologue, protected_all, how='intersection')

# Calculate the area of the intersections in square kilometers
intersections['protected_km2'] = intersections.geometry.area / 1e6

# Group intersections by Ethnologue ID and sum the protected areas
protected_by_ethno = intersections.groupby('ID', as_index=False)['protected_km2'].sum()

#Merging to original
ethnologue_protectedland = ethnologue.merge(protected_by_ethno, on='ID', how='left')
ethnologue_protectedland['protected_km2'] = ethnologue_protectedland['protected_km2'].fillna(0)

  intersections = gpd.overlay(ethnologue, protected_all, how='intersection')

  intersections['protected_km2'] = intersections.geometry.area / 1e6


In [11]:
ethnologue_protectedland.head()

Unnamed: 0,ID,geometry,protected_km2
0,RUS-RUS,"MULTIPOLYGON (((45.11381 43.19152, 45.06952 43...",0.000104
1,ENG-USA,"MULTIPOLYGON (((-75.11965 19.97389, -75.11936 ...",8.7e-05
2,POR-BRA,"MULTIPOLYGON (((-52.13058 -31.97901, -52.12447...",0.00015
3,ENG-AUS,"MULTIPOLYGON (((147.36099 -43.08758, 147.36375...",5.1e-05
4,CMN-CHN,"MULTIPOLYGON (((108.65391 19.06045, 108.63222 ...",4e-06


In [12]:
# Keep only relevant columns
ethnologue_protectedarea = ethnologue_protectedland[["ID", "protected_km2"]]

# Export to CSV
ethnologue_protectedarea.to_csv(protectedland_path / "ethnologue_wdpa.csv", index=False)

print(f"Exported ethnologue_wdpa.csv")

Exported ethnologue_wdpa.csv
