# Calculating HII at the Ethnologue Polygon Level

In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
import xarray as xr
import rioxarray
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import matplotlib.patches as mpatches
from matplotlib.font_manager import FontProperties

import mapclassify

from rapidfuzz import process, fuzz

from shapely.geometry import Point

import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterstats import zonal_stats

In [2]:
# Set base project path
base_path = Path("C:/Users/juami/Dropbox/RAships/2-Folklore-Nathan-Project/EA-Maps-Nathan-project/Measures_work")

# Set file paths
poscol_path = base_path / "data" / "raw" / "ethnologue" / "ancestral_characteristics_database_language_level" / "Ethnologue_16_shapefile" / "langa_no_overlap_biggest_clean.shp"

data_path = base_path / "data" / "interim"
maps_path = base_path / "maps" / "raw"
hii_path = maps_path / "HII" / "hii_2001-01-01.tif"

In [3]:
# Read the shapefiles and CSV files
ethnologue = gpd.read_file(poscol_path)
ethnologue_proj = ethnologue.to_crs(epsg=6933)

hii = rasterio.open(hii_path)
ethnologue = ethnologue.to_crs(hii.crs)

In [4]:
# Compute zonal statistics (mean within each polygon)
stats = zonal_stats(ethnologue, hii_path, stats=["mean"], geojson_out=True)

In [5]:
ethnologue_hii = gpd.GeoDataFrame.from_features(stats)
ethnologue_hii.rename(columns={"mean": "hii"}, inplace=True)

print(ethnologue_hii[['hii']].head())

           hii
0   455.000700
1   843.404400
2   471.673553
3   356.541696
4  1026.041199


In [6]:
# Keep only relevant columns
ethnologue_hii = ethnologue_hii[["ID","hii"]]

# Export to CSV
ethnologue_hii.to_csv(maps_path / "HII" / "ethnologue_hii.csv", index=False)

print(f"Exported ethnologue_hii.csv")

Exported ethnologue_hii.csv
