## Function 

In [17]:
import pandas as pd
import geopandas as gpd
from pathlib import Path
from rasterstats import zonal_stats

def run_zonal_stats(
    aoi_path,
    raster_path,
    out_dir,
    stats=["mean"],
    nodata=-9999.0,
    suffix=None,
    drop_cols=None
):
    """
    Compute zonal statistics for a single raster and save results to CSV.

    Parameters
    ----------
    aoi_path : str or Path
        Path to AOI shapefile/GeoJSON/GeoPackage.
    raster_path : str or Path
        Path to raster file.
    out_dir : str or Path
        Directory where output CSV will be saved.
    stats : list[str], default ["mean"]
        Zonal statistics to compute.
    nodata : float, default -9999.0
        NoData value in raster.
    suffix : str or None
        Optional suffix to append to stat column names and output file.
        If None, uses raster file stem.
    drop_cols : list[str] or None
        Columns to drop from output DataFrame.

    Returns
    -------
    pd.DataFrame
        DataFrame with zonal statistics.
    """
    aoi = gpd.read_file(aoi_path)
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    raster_path = Path(raster_path)
    suffix = suffix if suffix else raster_path.stem

    # look for folder names of interest
    candidates = ["lulc", "scenario"]
    lc_scenario = next((p for p in raster_path.parts if any(c in p for c in candidates)), None)
    print(lc_scenario)  # → "scenario1"

    # Run zonal stats
    zs_result = zonal_stats(
        aoi,
        str(raster_path),
        nodata=nodata,
        geojson_out=True,
        stats=stats
    )

    zs_gdf = gpd.GeoDataFrame.from_features(zs_result)
    df = pd.DataFrame(zs_gdf)

    # Add scenario/lc column if found
    if lc_scenario:
        df["lc_scenario"] = lc_scenario

    # add raster name column
    df["raster"] = raster_path.name

    # # Rename stats columns
    # rename_dict = {s: f"{s}_{suffix}" for s in stats}
    # df = df.rename(columns=rename_dict)

    

    # Drop unwanted columns
    if drop_cols:
        df = df.drop(columns=drop_cols, errors="ignore")

    # Save to CSV
    out_csv = out_dir / f"{suffix}_zonal_stats.csv"
    df.to_csv(out_csv, index=False)
    print(f"[OK] Zonal stats saved → {out_csv}")

    return df


## Data dir

In [18]:
from pathlib import Path

# Base working directory
base = Path(r"G:\Shared drives\Wellcome Trust Project Data") 


# Set the working directory
wd_main     = base
wd_shp      = base / "1_preprocess" / "UrbanCoolingModel" / "OfficialWorkingInputs" / "AOIs"
dir_ucm_out = base / "2_postprocess_intermediate" / "UCM_official_runs"
figures_dir = base / "3_final" / "UCM_figures"

## Batch temp raster 

In [19]:
aoi_adm = wd_shp / "London_Borough_aoi.shp"  # Administrative boundary (e.g., census tracts)

temp1_path = dir_ucm_out / "current_lulc" / "current_climate" / "intermediate" / "T_air_london_current_scenario_20deg_2uhi.tif"


raster = temp1_path
out_dir = raster.parent
print(out_dir)

drop_cols = ["HECTARES", "NONLD_AREA", "ONS_INNER", "SUB_2009", "SUB_2006", "geometry"]

run_zonal_stats(
        aoi_path=aoi_adm,
        raster_path=raster,
        out_dir=out_dir,
        stats=["mean"],
        drop_cols=drop_cols
    )




G:\Shared drives\Wellcome Trust Project Data\2_postprocess_intermediate\UCM_official_runs\current_lulc\current_climate\intermediate
current_lulc
[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\2_postprocess_intermediate\UCM_official_runs\current_lulc\current_climate\intermediate\T_air_london_current_scenario_20deg_2uhi_zonal_stats.csv


Unnamed: 0,NAME,GSS_CODE,mean,lc_scenario,raster
0,Kingston upon Thames,E09000021,21.433136,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
1,Croydon,E09000008,21.383583,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
2,Bromley,E09000006,21.264494,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
3,Hounslow,E09000018,21.456013,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
4,Ealing,E09000009,21.416098,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
5,Havering,E09000016,21.521243,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
6,Hillingdon,E09000017,21.426215,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
7,Harrow,E09000015,21.431018,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
8,Brent,E09000005,21.513034,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif
9,Barnet,E09000003,21.318097,current_lulc,T_air_london_current_scenario_20deg_2uhi.tif


## loop more rasters

In [20]:

rasters = [
    dir_ucm_out / "current_lulc" / "current_climate" / "intermediate" / "T_air_london_current_scenario_20deg_2uhi.tif",
    dir_ucm_out / "current_lulc" / "future_climate"  / "intermediate" / "T_air_london_future_scenario_25deg_5uhi.tif",

    dir_ucm_out / "scenario1"   / "current_climate"  / "intermediate" / "T_air_london_current_scenario_20deg_2uhi.tif",
    dir_ucm_out / "scenario1"   / "future_climate"  / "intermediate" / "T_air_london_future_scenario_25deg_5uhi.tif",

    dir_ucm_out / "scenario2"   / "current_climate"  / "intermediate" / "T_air_london_current_scenario_20deg_2uhi.tif",
    dir_ucm_out / "scenario2"   / "future_climate"  / "intermediate" / "T_air_london_future_scenario_25deg_5uhi.tif",

    dir_ucm_out / "scenario3"   / "current_climate"  / "intermediate" / "T_air_london_current_scenario_20deg_2uhi.tif",
    dir_ucm_out / "scenario3"   / "future_climate"  / "intermediate" / "T_air_london_future_scenario_25deg_5uhi.tif",
]


all_results = []
for raster in rasters:
    df = run_zonal_stats(
        aoi_path=aoi_adm,
        raster_path=raster,
        out_dir=out_dir,
        stats=["mean"],
        drop_cols=drop_cols
    )
    all_results.append(df)

# # Optionally merge all results side-by-side
# df_combined = pd.concat(all_results, axis=1)
# print(df_combined.head())


# row-wise combine
df_combined = pd.concat(all_results, axis=0, ignore_index=True)


current_lulc
[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\2_postprocess_intermediate\UCM_official_runs\current_lulc\current_climate\intermediate\T_air_london_current_scenario_20deg_2uhi_zonal_stats.csv
current_lulc
[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\2_postprocess_intermediate\UCM_official_runs\current_lulc\current_climate\intermediate\T_air_london_future_scenario_25deg_5uhi_zonal_stats.csv
scenario1
[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\2_postprocess_intermediate\UCM_official_runs\current_lulc\current_climate\intermediate\T_air_london_current_scenario_20deg_2uhi_zonal_stats.csv
scenario1
[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\2_postprocess_intermediate\UCM_official_runs\current_lulc\current_climate\intermediate\T_air_london_future_scenario_25deg_5uhi_zonal_stats.csv
scenario2
[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\2_postprocess_interm

In [21]:


# Save to CSV
out_csv = figures_dir / f"T_air_london_{len(all_results)}_zonal_stats_long.csv"
df_combined.to_csv(out_csv, index=False)
print(f"[OK] Zonal stats saved → {out_csv}")

[OK] Zonal stats saved → G:\Shared drives\Wellcome Trust Project Data\3_final\UCM_figures\T_air_london_8_zonal_stats_long.csv
