# Calculating GHG at the Ethnologue Polygon Level

In [18]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
import xarray as xr
import rioxarray
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import matplotlib.patches as mpatches
from matplotlib.font_manager import FontProperties

import mapclassify

from rapidfuzz import process, fuzz

from shapely.geometry import Point

import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterstats import zonal_stats

In [19]:
# Set base project path
base_path = Path("C:/Users/juami/Dropbox/RAships/2-Folklore-Nathan-Project/EA-Maps-Nathan-project/Measures_work")

# Set file paths
poscol_path = base_path / "data" / "raw" / "ethnologue" / "ancestral_characteristics_database_language_level" / "Ethnologue_16_shapefile" / "langa_no_overlap_biggest_clean.shp"

data_path = base_path / "data" / "interim"
maps_path = base_path / "maps" / "raw"
ghg_path = maps_path / "CO2" / "EDGAR_2025_GHG_CO2_2000_TOTALS_flx.tif"

In [20]:
# Read the shapefiles and CSV files
ethnologue = gpd.read_file(poscol_path)
ethnologue_proj = ethnologue.to_crs(epsg=6933)

ghg = rasterio.open(ghg_path)
ethnologue = ethnologue.to_crs(ghg.crs)

In [21]:
# Read the GHG raster data
ghg_data = rioxarray.open_rasterio(ghg_path)
print(f"GHG data shape: {ghg_data.shape}")
print(f"GHG data resolution: {ghg_data.rio.resolution()}")
print(f"GHG data CRS: {ghg_data.rio.crs}")

# Calculate pixel area in m² for 0.1 degree resolution
# At equator: 1 degree ≈ 111,320 meters
# 0.1 degree ≈ 11,132 meters
# But we need to account for latitude variation
import numpy as np

# Get latitude values for each pixel
lats = ghg_data.y.values
lat_resolution = abs(ghg_data.rio.resolution()[1])  # degrees
lon_resolution = abs(ghg_data.rio.resolution()[0])  # degrees

print(f"Lat resolution: {lat_resolution}°, Lon resolution: {lon_resolution}°")

GHG data shape: (1, 1800, 3600)
GHG data resolution: (0.1, -0.1)
GHG data CRS: EPSG:4326
Lat resolution: 0.1°, Lon resolution: 0.1°


In [22]:
# Calculate pixel area accounting for latitude variation
def calculate_pixel_area(lat, lat_res, lon_res):
    """
    Calculate pixel area in m² given latitude and resolution in degrees
    """
    # Earth's radius in meters
    R = 6371000
    
    # Convert degrees to radians
    lat_rad = np.radians(lat)
    lat_res_rad = np.radians(lat_res)
    lon_res_rad = np.radians(lon_res)
    
    # Area calculation accounting for latitude
    # Area = R² * cos(lat) * lat_res * lon_res (in radians)
    area_m2 = R**2 * np.cos(lat_rad) * lat_res_rad * lon_res_rad
    
    return area_m2

# Create pixel area array for each latitude
pixel_areas = np.zeros_like(ghg_data[0].values)
for i, lat in enumerate(lats):
    pixel_area = calculate_pixel_area(lat, lat_resolution, lon_resolution)
    pixel_areas[i, :] = pixel_area

print(f"Pixel areas shape: {pixel_areas.shape}")
print(f"Pixel area range: {pixel_areas.min():,.0f} to {pixel_areas.max():,.0f} m²")
print(f"Pixel area at equator: {pixel_areas[pixel_areas.shape[0]//2, 0]:,.0f} m²")

Pixel areas shape: (1800, 3600)
Pixel area range: 107,899 to 123,643,072 m²
Pixel area at equator: 123,643,072 m²


In [23]:
# Convert GHG fluxes from kg/m²/s to tonnes/year for each pixel
seconds_per_year = 365.25 * 24 * 3600
kg_to_tonnes = 0.001

# Get GHG flux data (kg/m²/s)
ghg_flux = ghg_data[0].values  # First (and likely only) band

# Convert to tonnes/m²/year
ghg_flux_tonnes_m2_year = ghg_flux * seconds_per_year * kg_to_tonnes

# Calculate total emissions per pixel (tonnes/year)
ghg_total_tonnes_year = ghg_flux_tonnes_m2_year * pixel_areas

print(f"GHG flux range (kg/m²/s): {np.nanmin(ghg_flux):.2e} to {np.nanmax(ghg_flux):.2e}")
print(f"GHG flux range (tonnes/m²/year): {np.nanmin(ghg_flux_tonnes_m2_year):.2e} to {np.nanmax(ghg_flux_tonnes_m2_year):.2e}")
print(f"Total GHG per pixel range (tonnes/year): {np.nanmin(ghg_total_tonnes_year):.2e} to {np.nanmax(ghg_total_tonnes_year):.2e}")
print(f"Total global GHG (tonnes/year): {np.nansum(ghg_total_tonnes_year):,.0f}")

# Create new xarray with converted data
ghg_converted = ghg_data.copy()
ghg_converted[0] = ghg_total_tonnes_year
ghg_converted.attrs['units'] = 'tonnes/year'
ghg_converted.attrs['long_name'] = 'CO2 emissions per pixel'

GHG flux range (kg/m²/s): 0.00e+00 to 3.51e-05
GHG flux range (tonnes/m²/year): 0.00e+00 to 1.11e+00
Total GHG per pixel range (tonnes/year): 0.00e+00 to 8.93e+07
Total global GHG (tonnes/year): 25,642,084,352


In [24]:
# Save the converted raster temporarily for zonal stats
temp_raster_path = maps_path / "CO2" / "temp_ghg_tonnes_per_year.tif"
ghg_converted.rio.to_raster(temp_raster_path)


In [25]:

# Perform zonal statistics with the converted data (sum to get total emissions per polygon)
stats = zonal_stats(ethnologue, temp_raster_path, stats=["sum"], geojson_out=True)

# Create GeoDataFrame
ethnologue_ghg = gpd.GeoDataFrame.from_features(stats)
ethnologue_ghg.rename(columns={"sum": "ghg"}, inplace=True)

print(ethnologue_ghg[['ghg']].head())

            ghg
0  1.386498e+09
1  5.349706e+09
2  2.946257e+08
3  3.072212e+08
4  1.984995e+09


In [26]:
# Keep only relevant columns
ethnologue_ghg = ethnologue_ghg[["ID","ghg"]]

# Export to CSV
ethnologue_ghg.to_csv(maps_path / "CO2" / "ethnologue_ghg.csv", index=False)

print(f"Exported ethnologue_ghg.csv")

Exported ethnologue_ghg.csv
