To calculate tree cover percentage by LSOA for London

In [None]:
# pip install geopandas rasterio numpy pyogrio

import geopandas as gpd
import numpy as np
import rasterio
from rasterio import features
from rasterio.transform import from_bounds

from pathlib import Path  
import os

from pathlib import Path

# Set the working directory
wd_main     = Path(r"G:\Shared drives\Wellcome Trust Project Data") 
wd_shp      = wd_main / "1_preprocess" / "UrbanCoolingModel" / "OfficialWorkingInputs" / "AOIs"


# --- 1. Dynamic Paths ---
# Construct paths relative to the script location
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)

# --- 1. Settings & Paths ---
LSOA_PATH = wd_shp / 'Social_Vulnerability_Index_london_q.gpkg'
TREE_PATH = wd_main / "0_source_data" / "uk_shapefile_TCC24" / "TreeCanopyCover24_stitched.shp" 



print("Loading datasets...")
# 'engine="pyogrio"' is significantly faster for reading large shapefiles
lsoa = gpd.read_file(LSOA_PATH, engine="pyogrio")
trees = gpd.read_file(TREE_PATH, engine="pyogrio")

Loading datasets...


In [None]:
# OUTPUT_PATH = os.path.join(parent_dir, 'data', "GLA_Tree_Cover_by_LSOA.gpkg")
OUTPUT_PATH = wd_shp / "GLA_Tree_Cover_by_LSOA.gpkg"
RESOLUTION = 10  # 1 meter


# Ensure CRS matches (British National Grid recommended: EPSG:27700)
target_crs = 27700
if lsoa.crs.to_epsg() != target_crs:
    lsoa = lsoa.to_crs(epsg=target_crs)
if trees.crs.to_epsg() != target_crs:
    trees = trees.to_crs(epsg=target_crs)

# --- 2. Setup the Grid ---
print("Defining 1m raster grid...")
# We use the bounds of the LSOA layer to define our canvas
xmin, ymin, xmax, ymax = lsoa.total_bounds

# Calculate grid dimensions
width = int((xmax - xmin) / RESOLUTION)
height = int((ymax - ymin) / RESOLUTION)

# Create the affine transform for the raster
transform = from_bounds(xmin, ymin, xmax, ymax, width, height)

# --- 3. Rasterize LSOAs (Create Zone Map) ---
print("Rasterizing LSOA polygons (creating zone map)...")
# We map each LSOA to a unique integer ID (0, 1, 2...)
# Create a temporary ID column for mapping
lsoa['temp_id'] = range(len(lsoa))

# Create a generator of (geometry, value) pairs
lsoa_shapes = ((geom, val) for geom, val in zip(lsoa.geometry, lsoa.temp_id))

# Burn into a grid. fill=-1 for areas outside any LSOA.
# int32 is large enough for thousands of LSOAs.
lsoa_grid = features.rasterize(
    shapes=lsoa_shapes,
    out_shape=(height, width),
    transform=transform,
    fill=-1,
    dtype='int32'
)

# --- 4. Rasterize Trees (Create Binary Map) ---
print("Rasterizing Tree polygons...")
# Create a generator where every tree polygon = 1
tree_shapes = ((geom, 1) for geom in trees.geometry)

# Burn into a boolean grid (uint8 uses less memory)
tree_grid = features.rasterize(
    shapes=tree_shapes,
    out_shape=(height, width),
    transform=transform,
    fill=0,
    dtype='uint8'
)

# --- 5. Calculate Overlap (Numpy Speed) ---
print("Calculating zonal statistics...")

# Flatten the arrays to 1D for fast bincounting
flat_lsoa = lsoa_grid.ravel()
flat_trees = tree_grid.ravel()

# Filter: We only care about pixels that are inside an LSOA AND are trees
# Create a mask where trees exist (value=1) AND we are inside an LSOA (value != -1)
# Note: Since trees might extend outside LSOAs, we must filter by both.
mask = (flat_trees == 1) & (flat_lsoa != -1)

# Extract the LSOA IDs for every tree pixel
tree_pixels_in_lsoa = flat_lsoa[mask]

# np.bincount counts how many times each ID appears. 
# minlength ensures we get a count for every LSOA, even those with 0 trees.
tree_pixel_counts = np.bincount(tree_pixels_in_lsoa, minlength=len(lsoa))

# --- 6. Merge & Save ---
print("Finalizing data...")
# Add results back to the GeoDataFrame
lsoa['tree_area_m2'] = tree_pixel_counts * (RESOLUTION ** 2)
lsoa['lsoa_area_m2'] = lsoa.area
lsoa['tree_pct'] = (lsoa['tree_area_m2'] / lsoa['lsoa_area_m2']) * 100

# Cleanup temporary ID
lsoa = lsoa.drop(columns=['temp_id'])

print(f"Saving to {OUTPUT_PATH}...")
lsoa.to_file(OUTPUT_PATH, driver="GPKG")

print("Done!")

Defining 1m raster grid...
Rasterizing LSOA polygons (creating zone map)...
Rasterizing Tree polygons...
