# Packages

In [2]:
import geopandas as gpd
import os
import rasterio
import geopandas as gpd
import numpy as np
from rasterio.mask import mask
from rasterio.features import rasterize


In [3]:

# Define file path
gigl_path = r"G:\Shared drives\Wellcome Trust Project Data\0_source_data\GiGL land use data\GiGL_OpenSpace_Sites_All_region\GiGL_OpenSpace_Sites_All_region.shp"

# Read the spatial data
d = gpd.read_file(gigl_path)


# LC input 

### ESA data

In [4]:

# # --- File Paths ---
# lc_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_v200_Mosaic_Mask_proj.tif"

# tree_cover_code = 1  # ESA WorldCover: Tree cover
# shrubland_code = 2   # ESA WorldCover: Shrubland
# pavement_code = 5    # ESA WorldCover: Built-up (pavement)

# # list of source codes you want to convert to Built-up
# codes_to_built = [tree_cover_code, shrubland_code]  # add any others

# # Define labels
# land_cover_labels = {
#     1: 'Tree cover',
#     2: 'Shrubland',
#     3: 'Grassland',
#     4: 'Cropland',
#     5: 'Built-up',
#     6: "Bare / sparse vegetation",
#     7: "Snow and ice",
#     8: "Permanent water bodies",
#     9: "Herbaceous wetland",
#     10: "Moss and lichen"
# }



### UK data

In [5]:
# --- File Paths ---
lc_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2021_london.tif"

tree_cover_code = 1  # UK data: Tree cover
shrubland_code = 2   # UK data: Shrubland
pavement_code = 20    # UK data: Urban (pavement)

# list of source codes you want to convert to Built-up
codes_to_built = [tree_cover_code, shrubland_code, 9, 10]  # add any others

# Define labels
land_cover_labels = {
    1: 'Deciduous woodland',
    2: 'Coniferous woodland',
    3: 'Arable',
    4: 'Improved Grassland',
    5: 'Neutral Grassland',
    6: 'Calcareous Grassland',
    7: 'Acid grassland',
    8: 'Fen, Marsh, and Swamp',
    9: 'Heather',
    10: 'Heather grassland',
    11: 'Bog',
    12: 'Inland Rock',
    13: 'Saltwater',
    14: 'Freshwater',
    15: 'Supralittoral Rock',
    16: 'Supralittoral Sediment',
    17: 'Littoral Rock',
    18: 'Littoral Sediment',
    19: 'Saltmarsh',
    20: 'Urban',
    21: 'Suburban',
}


## Baseline LC data

### % of each LC - baseline

Using ESA LC data

In [17]:
from collections import Counter

# --- Step 1: Load the Raster & apply mask ---
with rasterio.open(lc_path) as src:
    landcover_meta = src.meta.copy()
    landcover_crs  = src.crs
    transform      = src.transform
    arr            = src.read(1)
    src_nodata     = src.nodata

# build a combined mask: zeros and any existing src NoData
mask = (arr == 0)
if src_nodata is not None:
    mask |= (arr == src_nodata)

# masked array where invalid pixels are masked out
landcover_data = np.ma.array(arr, mask=mask)

# ensure 0 is recorded as NoData going forward
landcover_meta.update(nodata=0)
nodata = 0

# --- Count valid land-use classes ---
# .compressed() returns a 1D array with masked values removed
valid_pixels = landcover_data.compressed()

# Counter expects Python ints; cast once for safety
counts = Counter(valid_pixels.astype(int).tolist())

# Total number of valid pixels
total_pixels = sum(counts.values())

# Calculate proportions
proportions = {land_use: count / total_pixels for land_use, count in counts.items()}

# Print result
# Print with labels
print("Land use proportions:")
for lu_type, prop in proportions.items():
    label = land_cover_labels.get(lu_type, f"Unknown ({lu_type})")
    print(f"{label}: {prop:.2%}")

Land use proportions:
Improved Grassland: 28.23%
Arable: 8.93%
Deciduous woodland: 7.98%
Suburban: 27.21%
Urban: 23.07%
Coniferous woodland: 0.78%
Freshwater: 2.45%
Heather grassland: 0.15%
Heather: 0.02%
Calcareous Grassland: 0.64%
Inland Rock: 0.11%
Fen, Marsh, and Swamp: 0.29%
Neutral Grassland: 0.00%
Saltmarsh: 0.06%
Saltwater: 0.07%
Supralittoral Sediment: 0.00%
Littoral Sediment: 0.01%


# LC scenarios 

## Create LC scenario 1 - to pavement 

Turn tree cover and shrub to built-up land

In [15]:

lc_scenario_path = lc_path.replace(".tif", "_scenario1_pavement.tif")

# --- Step 2: Remap classes (1,2) -> 5 while preserving NoData ---
remapped = landcover_data.copy()

# mask to protect NoData
if nodata is not None:
    valid_mask = landcover_data != nodata
else:
    valid_mask = np.ones_like(landcover_data, dtype=bool)

# boolean mask of all pixels to convert
to_built = np.isin(landcover_data, codes_to_built)
remapped = np.where(valid_mask & to_built, pavement_code, landcover_data)

# --- Step 3: Write out the scenario raster ---
# keep original dtype; add compression if you like
landcover_meta.update(
    dtype=remapped.dtype,
    nodata=nodata,
    compress="lzw"
)

with rasterio.open(lc_scenario_path, "w", **landcover_meta) as dst:
    dst.write(remapped, 1)

print(f"Scenario saved to: {lc_scenario_path}")



# --- Optional: quick summary of the change and area ---
changed_pixels = int((valid_mask & to_built).sum())
px_area = abs(transform.a) * abs(transform.e)  # m² per pixel (if in a projected CRS)
changed_area_km2 = changed_pixels * px_area / 1e6 if landcover_crs.is_projected else None

print(f"Pixels converted to Built-up: {changed_pixels:,}")
if changed_area_km2 is not None:
    print(f"Estimated area converted: {changed_area_km2:,.2f} km² "
          f"(assuming projected CRS: {landcover_crs})")
else:
    print("Area estimate skipped (CRS is geographic; reproject raster to a projected CRS for area).")

# --- Optional: update labels for the new scenario (if you keep a legend) ---
land_cover_labels_scenario = {k: v for k, v in land_cover_labels.items()}
land_cover_labels_scenario[tree_cover_code] = "Built-up"
land_cover_labels_scenario[shrubland_code]  = "Built-up"
# Note: The raster now has only class 5 where 1/2 used to be; this relabel helps for plotting legends.



Scenario saved to: G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2021_london_scenario1_pavement.tif
Pixels converted to Built-up: 2,369,817
Estimated area converted: 236.98 km² (assuming projected CRS: EPSG:27700)


### % of each LC - function 

In [8]:
import numpy as np
import pandas as pd

def summarize_classes(arr, labels, nodata=None, px_area_m2=None, sort_by="class"):
    """
    Summarize class counts, proportions, and (optional) areas for a labeled raster.
    - arr: 2D numpy array of class codes
    - labels: dict {code: "label"}
    - nodata: value to exclude (or None)
    - px_area_m2: pixel area in m² (optional; requires projected CRS)
    - sort_by: "class" or "proportion"
    """
    if nodata is not None:
        mask = arr != nodata
    else:
        mask = np.ones_like(arr, dtype=bool)

    vals, counts = np.unique(arr[mask], return_counts=True)
    total = counts.sum()

    # Build DataFrame
    df = pd.DataFrame({
        "class_code": vals,
        "label": [labels.get(int(v), f"Class {int(v)}") for v in vals],
        "count": counts,
        "proportion": counts / total
    })
    df["percent"] = 100 * df["proportion"]

    if px_area_m2 is not None:
        df["area_m2"] = df["count"] * px_area_m2
        df["area_ha"] = df["area_m2"] / 10_000
        df["area_km2"] = df["area_m2"] / 1e6

    if sort_by == "proportion":
        df = df.sort_values("proportion", ascending=False)
    else:
        df = df.sort_values("class_code")

    # Nice rounding for display
    df["proportion"] = df["proportion"].round(6)
    df["percent"] = df["percent"].round(3)
    for col in ("area_m2", "area_ha", "area_km2"):
        if col in df.columns:
            df[col] = df[col].round(2)

    return df.reset_index(drop=True)




### % of each LC - run 

In [None]:


# ---- Compute pixel area if projected (optional) ----
px_area_m2 = None
if landcover_crs and landcover_crs.is_projected:
    # rasterio Affine: transform.a = pixel width, transform.e = pixel height (negative)
    px_area_m2 = abs(transform.a) * abs(transform.e)

# ---- Summaries: BEFORE (original) and AFTER (remapped) ----
summary_before = summarize_classes(
    landcover_data,
    land_cover_labels,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

# Optional: if you defined land_cover_labels_scenario; otherwise reuse land_cover_labels
labels_after = globals().get("land_cover_labels_scenario", land_cover_labels)

summary_after = summarize_classes(
    remapped,
    labels_after,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

print("\n=== Class proportions BEFORE scenario ===")
print(summary_before.to_string(index=False))

print("\n=== Class proportions AFTER scenario ===")
print(summary_after.to_string(index=False))

# ---- Optional: quick change report for Tree->Built-up and Shrubland->Built-up ----
def class_count(df, code):
    row = df.loc[df["class_code"] == code, "count"]
    return int(row.iloc[0]) if len(row) else 0

c_tree_before = class_count(summary_before, tree_cover_code)
c_shrub_before = class_count(summary_before, shrubland_code)
c_built_before = class_count(summary_before, pavement_code)

c_tree_after  = class_count(summary_after, tree_cover_code)
c_shrub_after = class_count(summary_after, shrubland_code)
c_built_after = class_count(summary_after, pavement_code)

print("\n=== Change summary ===")
print(f"Tree cover: {c_tree_before:,}  -> {c_tree_after:,}")
print(f"Shrubland:  {c_shrub_before:,} -> {c_shrub_after:,}")
print(f"Built-up:   {c_built_before:,} -> {c_built_after:,} "
      f"(+{(c_built_after - c_built_before):,})")

# ---- Optional: save to CSV ----
out_csv_before = lc_scenario_path.replace("_BuiltUpScenario.tif", "_class_summary_BEFORE.csv")
out_csv_after  = lc_scenario_path.replace("_BuiltUpScenario.tif", "_class_summary_AFTER.csv")
summary_before.to_csv(out_csv_before, index=False)
summary_after.to_csv(out_csv_after, index=False)
print(f"\nSaved summaries:\n- {out_csv_before}\n- {out_csv_after}")



=== Class proportions BEFORE scenario ===
 class_code                  label    count  proportion  percent      area_m2   area_ha  area_km2
        0.0                Class 0 11115729    0.295292   29.529 1111572900.0 111157.29   1111.57
        1.0     Deciduous woodland  2116821    0.056234    5.623  211682100.0  21168.21    211.68
        2.0    Coniferous woodland   207816    0.005521    0.552   20781600.0   2078.16     20.78
        3.0                 Arable  2368119    0.062910    6.291  236811900.0  23681.19    236.81
        4.0     Improved Grassland  7487745    0.198914   19.891  748774500.0  74877.45    748.77
        5.0      Neutral Grassland       16    0.000000    0.000       1600.0      0.16      0.00
        6.0   Calcareous Grassland   170706    0.004535    0.453   17070600.0   1707.06     17.07
        8.0  Fen, Marsh, and Swamp    76341    0.002028    0.203    7634100.0    763.41      7.63
        9.0                Heather     4485    0.000119    0.012     448500

## Create LC scenario 2 - more tree cover

### Define and generate opportunity land polygons 

In [10]:

# print(sorted(d["POSGrade"].dropna().unique()))

# # Define categories to filter
# PrimaryUse_select = [
#     "Disused quarry/gravel pit", "Disused railway trackbed", "Land reclamation",
#     "Other hard surfaced areas", "Other recreational", "Road island/verge",
#     "Vacant land"
# ]

# # Filter data based on the selected categories
# d_opportunityLC = d[d["PrimaryUse"].isin(PrimaryUse_select)]

# # # Display the filtered data
# # print(d_opportunityLC.head())



# ## Save filtered shp

# output_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_Sites_opportunityLC.shp"

# # Save filtered data as a new shapefile
# d_opportunityLC.to_file(output_path, driver="ESRI Shapefile")

# print(f"Filtered shapefile saved at: {output_path}")



# --- Step 2: Load the Shapefile ---
d_opportunityLC = gpd.read_file(shp_opportunityLC_path)

## total area calculation
# Ensure CRS is projected (replace EPSG:XXXX with a suitable projection for your area, e.g., EPSG:5070 for US)
if d_opportunityLC.crs.is_geographic:
    d_opportunityLC = d_opportunityLC.to_crs(epsg=5070)  # Albers Equal Area for US

# Calculate total area in square meters
total_area_m2 = d_opportunityLC.geometry.area.sum()

# Optionally convert to square kilometers or hectares
total_area_km2 = total_area_m2 / 1e6
total_area_ha = total_area_m2 / 10000

print(f"Total area of opportunity land cover: {total_area_km2:,.2f} km²")
print(f"Total area of opportunity land cover: {total_area_ha:,.2f} hectares")

Total area of opportunity land cover: 33.40 km²
Total area of opportunity land cover: 3,339.98 hectares


### Load opportunity land shapefile

In [11]:
shp_opportunityLC_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\GiGL_OpenSpace_Sites_opportunityLC.shp"

### create the lc scenario

In [12]:
lc_scenario_opp2treecover = lc_path.replace(".tif", "_scenario2_opp2treecover.tif")


# --- Step 3: Reproject Shapefile if Needed ---
# (Skip if either CRS is None; otherwise align to raster CRS)
if d_opportunityLC.crs and (d_opportunityLC.crs != landcover_crs):
    d_opportunityLC = d_opportunityLC.to_crs(landcover_crs)

# Drop empty/invalid geometries (common source of rasterize errors)
d_opportunityLC = d_opportunityLC[d_opportunityLC.geometry.notna() & ~d_opportunityLC.geometry.is_empty]

# --- Step 4: Rasterize the Shapefile ---
shape_mask = rasterize(
    [(geom, 1) for geom in d_opportunityLC.geometry],
    out_shape=landcover_data.shape,
    transform=landcover_meta["transform"],
    fill=0,
    dtype="uint8"
)


# --- Step 5: Apply the Mask to Update Land Cover Values ---

remapped2 = landcover_data.copy()

# Protect NoData if present
if nodata is not None:
    valid_mask = (landcover_data != nodata)
else:
    valid_mask = np.ones_like(landcover_data, dtype=bool)

target_mask = (shape_mask == 1) & valid_mask
remapped2[target_mask] = tree_cover_code



# --- Step 6: Save the Updated Raster ---
landcover_meta.update(dtype=rasterio.uint8, compress="lzw")  # Ensure correct datatype

meta_out = landcover_meta.copy()
meta_out.update(
    dtype=rasterio.uint8,
    compress="lzw",
    nodata=nodata
)


with rasterio.open(lc_scenario_opp2treecover, "w", **meta_out) as dst:
    dst.write(remapped2.astype(rasterio.uint8), 1)

print(f"Updated land cover raster saved at: {lc_scenario_opp2treecover}")


# Scenario labels
land_cover_labels_scenario2 = land_cover_labels.copy()


Updated land cover raster saved at: G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2021_london_scenario2_opp2treecover.tif


### % of LC change - run

In [None]:

# ---- Compute pixel area if projected (optional) ----
px_area_m2 = None
if landcover_crs and landcover_crs.is_projected:
    # rasterio Affine: transform.a = pixel width, transform.e = pixel height (negative)
    px_area_m2 = abs(transform.a) * abs(transform.e)

# ---- Summaries: BEFORE (original) and AFTER (remapped) ----
summary_before = summarize_classes(
    landcover_data,
    land_cover_labels,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

# Optional: if you defined land_cover_labels_scenario; otherwise reuse land_cover_labels
labels_after = globals().get("land_cover_labels_scenario2", land_cover_labels)

summary_after = summarize_classes(
    remapped2,
    labels_after,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

print("\n=== Class proportions BEFORE scenario ===")
print(summary_before.to_string(index=False))

print("\n=== Class proportions AFTER scenario ===")
print(summary_after.to_string(index=False))

# ---- Optional: quick change report for Tree->Built-up and Shrubland->Built-up ----
def class_count(df, code):
    row = df.loc[df["class_code"] == code, "count"]
    return int(row.iloc[0]) if len(row) else 0

c_tree_before = class_count(summary_before, tree_cover_code)
c_shrub_before = class_count(summary_before, shrubland_code)
c_built_before = class_count(summary_before, pavement_code)

c_tree_after  = class_count(summary_after, tree_cover_code)
c_shrub_after = class_count(summary_after, shrubland_code)
c_built_after = class_count(summary_after, pavement_code)

print("\n=== Change summary ===")
print(f"Tree cover: {c_tree_before:,} -> {c_tree_after:,}")
print(f"Shrubland : {c_shrub_before:,} -> {c_shrub_after:,}")
print(f"Built-up  : {c_built_before:,} -> {c_built_after:,} "
      f"(+{(c_built_after - c_built_before):,})")

# ---- Optional: save to CSV ----
out_csv_before = lc_scenario_path.replace("_BuiltUpScenario.tif", "_class_summary_BEFORE.csv")
out_csv_after  = lc_scenario_path.replace("_BuiltUpScenario.tif", "_class_summary_AFTER.csv")
summary_before.to_csv(out_csv_before, index=False)
summary_after.to_csv(out_csv_after, index=False)
print(f"\nSaved summaries:\n- {out_csv_before}\n- {out_csv_after}")


=== Class proportions BEFORE scenario ===
 class_code                  label    count  proportion  percent      area_m2   area_ha  area_km2
        0.0                Class 0 11115729    0.295292   29.529 1111572900.0 111157.29   1111.57
        1.0     Deciduous woodland  2116821    0.056234    5.623  211682100.0  21168.21    211.68
        2.0    Coniferous woodland   207816    0.005521    0.552   20781600.0   2078.16     20.78
        3.0                 Arable  2368119    0.062910    6.291  236811900.0  23681.19    236.81
        4.0     Improved Grassland  7487745    0.198914   19.891  748774500.0  74877.45    748.77
        5.0      Neutral Grassland       16    0.000000    0.000       1600.0      0.16      0.00
        6.0   Calcareous Grassland   170706    0.004535    0.453   17070600.0   1707.06     17.07
        8.0  Fen, Marsh, and Swamp    76341    0.002028    0.203    7634100.0    763.41      7.63
        9.0                Heather     4485    0.000119    0.012     448500