# Packages

In [56]:
import geopandas as gpd
import os
import rasterio
import geopandas as gpd
import numpy as np
from rasterio.mask import mask
from rasterio.features import rasterize


In [57]:

# Define file path
gigl_path = r"G:\Shared drives\Wellcome Trust Project Data\0_source_data\GiGL land use data\GiGL_OpenSpace_Sites_All_region\GiGL_OpenSpace_Sites_All_region.shp"

# Read the spatial data
d = gpd.read_file(gigl_path)


# LC input 

## !! choose one of the LC inputs

## ESA data

In [58]:

# # --- File Paths ---
# lc_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_v200_Mosaic_Mask_proj.tif"

# tree_cover_code = 1  # ESA WorldCover: Tree cover
# shrubland_code = 2   # ESA WorldCover: Shrubland
# pavement_code = 5    # ESA WorldCover: Built-up (pavement)
# pavement_name = 'Built-up'

# # list of source codes you want to convert to Built-up
# codes_to_built = [tree_cover_code, shrubland_code, 3]  # add any others

# # Define labels
# land_cover_labels = {
#     1: 'Tree cover',
#     2: 'Shrubland',
#     3: 'Grassland',
#     4: 'Cropland',
#     5: 'Built-up',
#     6: "Bare / sparse vegetation",
#     7: "Snow and ice",
#     8: "Permanent water bodies",
#     9: "Herbaceous wetland",
#     10: "Moss and lichen"
# }



## UK data

In [59]:
# --- File Paths ---
lc_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2023_London_10m_clip2aoi_tcc24.tif"

lc_code_new_tcc = 100 # New code for Tree canopy in the reclassified data
tree_cover_code = 1  # UK data: Tree cover
shrubland_code = 2   # UK data: Shrubland
pavement_code = 20    # UK data: Urban (pavement)
pavement_name = 'Urban'

# list of source codes you want to convert to Built-up
codes_to_built = [1, 2, 4, 5, 6, 7, 9, 10, 100]  # add any others

# Define labels
land_cover_labels = {
    1: 'Deciduous woodland',
    2: 'Coniferous woodland',
    3: 'Arable',
    4: 'Improved Grassland',
    5: 'Neutral Grassland',
    6: 'Calcareous Grassland',
    7: 'Acid grassland',
    8: 'Fen, Marsh, and Swamp',
    9: 'Heather',
    10: 'Heather grassland',
    11: 'Bog',
    12: 'Inland Rock',
    13: 'Saltwater',
    14: 'Freshwater',
    15: 'Supralittoral Rock',
    16: 'Supralittoral Sediment',
    17: 'Littoral Rock',
    18: 'Littoral Sediment',
    19: 'Saltmarsh',
    20: 'Urban',
    21: 'Suburban',
    100: 'Tree canopy'
}



## Baseline LC data

### % of each LC - baseline

Using ESA LC data

In [60]:
from collections import Counter

# --- Step 1: Load the Raster & apply mask ---
with rasterio.open(lc_path) as src:
    landcover_meta = src.meta.copy()
    landcover_crs  = src.crs
    transform      = src.transform
    arr            = src.read(1)
    src_nodata     = src.nodata

# build a combined mask: zeros and any existing src NoData
mask = (arr == 0)
if src_nodata is not None:
    mask |= (arr == src_nodata)



# coerce to plain ndarray using nodata=0
nodata = 0
landcover_meta.update(nodata=0)
landcover_data = np.where(mask, nodata, arr).astype(np.int32)


# --- Count valid land-use classes ---
# # masked array where invalid pixels are masked out
landcover_data_arr = np.ma.array(arr, mask=mask)
# .compressed() returns a 1D array with masked values removed
valid_pixels = landcover_data_arr.compressed()

# Counter expects Python ints; cast once for safety
counts = Counter(valid_pixels.astype(int).tolist())

# Total number of valid pixels
total_pixels = sum(counts.values())

# Calculate proportions
proportions = {land_use: count / total_pixels for land_use, count in counts.items()}

# Print result
# Print with labels
print("Land use proportions:")
for lu_type, prop in proportions.items():
    label = land_cover_labels.get(lu_type, f"Unknown ({lu_type})")
    print(f"{label}: {prop:.2%}")

Land use proportions:
Improved Grassland: 17.65%
Tree canopy: 19.51%
Deciduous woodland: 0.51%
Arable: 3.58%
Urban: 30.80%
Suburban: 25.12%
Coniferous woodland: 0.07%
Freshwater: 2.05%
Calcareous Grassland: 0.26%
Heather: 0.01%
Heather grassland: 0.03%
Inland Rock: 0.08%
Fen, Marsh, and Swamp: 0.20%
Neutral Grassland: 0.00%
Saltmarsh: 0.05%
Saltwater: 0.07%
Supralittoral Sediment: 0.00%
Littoral Sediment: 0.01%


# LC scenarios 

## Create LC scenario 1 - to pavement 

Turn tree cover and shrub to built-up land

In [61]:

lc_scenario_path = lc_path.replace(".tif", "_scenario1_pavement.tif")

# --- Step 2: Remap classes (1,2) -> 5 while preserving NoData ---
remapped = landcover_data.copy()

# mask to protect NoData
if nodata is not None:
    valid_mask = landcover_data != nodata
else:
    valid_mask = np.ones_like(landcover_data, dtype=bool)

# boolean mask of all pixels to convert
to_built = np.isin(landcover_data, codes_to_built)
remapped = np.where(valid_mask & to_built, pavement_code, landcover_data)

# --- Step 3: Write out the scenario raster ---
# keep original dtype; add compression if you like
landcover_meta.update(
    dtype=remapped.dtype,
    nodata=nodata,
    compress="lzw"
)

with rasterio.open(lc_scenario_path, "w", **landcover_meta) as dst:
    dst.write(remapped, 1)

print(f"Scenario saved to:\n {lc_scenario_path}")



# --- Optional: quick summary of the change and area ---
changed_pixels = int((valid_mask & to_built).sum())
px_area = abs(transform.a) * abs(transform.e)  # m² per pixel (if in a projected CRS)
changed_area_km2 = changed_pixels * px_area / 1e6 if landcover_crs.is_projected else None

# print(f"Pixels converted to Built-up: {changed_pixels:,}")
if changed_area_km2 is not None:
    print(f"Estimated area converted: {changed_area_km2:,.2f} km² "
          f"(assuming projected CRS: {landcover_crs})")
else:
    print("Area estimate skipped (CRS is geographic; reproject raster to a projected CRS for area).")

# --- Optional: update labels for the new scenario (if you keep a legend) ---
# land_cover_labels_scenario = {k: v for k, v in land_cover_labels.items()}
# land_cover_labels_scenario[tree_cover_code] = pavement_name
# land_cover_labels_scenario[shrubland_code]  = pavement_name

# Scenario copy
land_cover_labels_scenario = land_cover_labels.copy()

# Update selected codes to Urban
for code in codes_to_built:
    land_cover_labels_scenario[code] = pavement_name

Scenario saved to:
 G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2023_London_10m_clip2aoi_tcc24_scenario1_pavement.tif
Estimated area converted: 606.67 km² (assuming projected CRS: EPSG:27700)


### % of each LC - function 

In [62]:

# load function to use in notebook
from function_summarize_lc_classes import summarize_lc_classes


### % of each LC - run 

In [63]:


# ---- Compute pixel area if projected (optional) ----
px_area_m2 = None
if landcover_crs and landcover_crs.is_projected:
    # rasterio Affine: transform.a = pixel width, transform.e = pixel height (negative)
    px_area_m2 = abs(transform.a) * abs(transform.e)

# ---- Summaries: BEFORE (original) and AFTER (remapped) ----
summary_before = summarize_lc_classes(
    landcover_data,
    land_cover_labels,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

# Optional: if you defined land_cover_labels_scenario; otherwise reuse land_cover_labels
labels_after = globals().get("land_cover_labels_scenario", land_cover_labels)

summary_after = summarize_lc_classes(
    remapped,
    labels_after,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

print("\n=== Class proportions BEFORE scenario ===")
print(summary_before.to_string(index=False))

print("\n=== Class proportions AFTER scenario ===")
print(summary_after.to_string(index=False))

# ---- Optional: quick change report for Tree->Built-up and Shrubland->Built-up ----
def class_count(df, code):
    row = df.loc[df["class_code"] == code, "count"]
    return int(row.iloc[0]) if len(row) else 0

# c_tree_before = class_count(summary_before, tree_cover_code)
# c_shrub_before = class_count(summary_before, shrubland_code)
# c_built_before = class_count(summary_before, pavement_code)

# c_tree_after  = class_count(summary_after, tree_cover_code)
# c_shrub_after = class_count(summary_after, shrubland_code)
# c_built_after = class_count(summary_after, pavement_code)

# print("\n=== Change summary ===")
# print(f"Tree cover: {c_tree_before:,}  -> {c_tree_after:,}")
# print(f"Shrubland:  {c_shrub_before:,} -> {c_shrub_after:,}")
# print(f"Built-up:   {c_built_before:,} -> {c_built_after:,} "
#       f"(+{(c_built_after - c_built_before):,})")

# ---- Optional: save to CSV ----
out_csv_before = lc_scenario_path.replace(".tif", "_class_summary_BEFORE.csv")
out_csv_after  = lc_scenario_path.replace(".tif", "_class_summary_AFTER.csv")
summary_before.to_csv(out_csv_before, index=False)
summary_after.to_csv(out_csv_after, index=False)
print(f"\nSaved summaries:\n- {out_csv_before}\n- {out_csv_after}")



=== Class proportions BEFORE scenario ===
 class_code                  label   count  proportion  percent     area_m2  area_ha  area_km2
          1     Deciduous woodland   81608    0.005117    0.512   8160800.0   816.08      8.16
          2    Coniferous woodland   11848    0.000743    0.074   1184800.0   118.48      1.18
          3                 Arable  570469    0.035773    3.577  57046900.0  5704.69     57.05
          4     Improved Grassland 2814783    0.176510   17.651 281478300.0 28147.83    281.48
          5      Neutral Grassland      10    0.000001    0.000      1000.0     0.10      0.00
          6   Calcareous Grassland   41369    0.002594    0.259   4136900.0   413.69      4.14
          8  Fen, Marsh, and Swamp   31479    0.001974    0.197   3147900.0   314.79      3.15
          9                Heather     906    0.000057    0.006     90600.0     9.06      0.09
         10      Heather grassland    4781    0.000300    0.030    478100.0    47.81      0.48
       

### change summary 

In [64]:
## compute the change in area_km2 ------------------------------------------------------------------ 
import pandas as pd

# Read
df_before = pd.read_csv(out_csv_before)
df_after  = pd.read_csv(out_csv_after)

# Keep only what we need and rename
b = df_before[['class_code', 'area_km2']].rename(columns={'area_km2': 'area_km2_before'})
a = df_after [['class_code', 'area_km2']].rename(columns={'area_km2': 'area_km2_after'})

# Outer join on class_code
merged = b.merge(a, on='class_code', how='outer')

# Ensure numeric, then compute change
for c in ['area_km2_before', 'area_km2_after']:
    merged[c] = pd.to_numeric(merged[c], errors='coerce')

merged['area_km2_change'] = merged['area_km2_after'].fillna(0) - merged['area_km2_before'].fillna(0)

# Save
out_csv_change = lc_scenario_path.replace(".tif", "_class_summary_CHANGE.csv")
merged.to_csv(out_csv_change, index=False)
print(f"Saved: {out_csv_change}")

print("\n=== Class proportions changed ===")
print(merged.to_string(index=False))

Saved: G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2023_London_10m_clip2aoi_tcc24_scenario1_pavement_class_summary_CHANGE.csv

=== Class proportions changed ===
 class_code  area_km2_before  area_km2_after  area_km2_change
          1             8.16             NaN            -8.16
          2             1.18             NaN            -1.18
          3            57.05           57.05             0.00
          4           281.48             NaN          -281.48
          5             0.00             NaN             0.00
          6             4.14             NaN            -4.14
          8             3.15            3.15             0.00
          9             0.09             NaN            -0.09
         10             0.48             NaN            -0.48
         12             1.29            1.29             0.00
         13             1.10            1.10             0.00
         14         

## Create LC scenario 2 - more tree cover

### Define and generate opportunity land polygons 

In [65]:

# print(sorted(d["POSGrade"].dropna().unique()))

# # Define categories to filter
# PrimaryUse_select = [
#     "Disused quarry/gravel pit", "Disused railway trackbed", "Land reclamation",
#     "Other hard surfaced areas", "Other recreational", "Road island/verge",
#     "Vacant land"
# ]

# # Filter data based on the selected categories
# polygon = d[d["PrimaryUse"].isin(PrimaryUse_select)]

# # # Display the filtered data
# # print(polygon.head())



# ## Save filtered shp

# output_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_Sites_opportunityLC.shp"

# # Save filtered data as a new shapefile
# polygon.to_file(output_path, driver="ESRI Shapefile")

# print(f"Filtered shapefile saved at: {output_path}")



### Load opportunity land shapefile
shp_opportunityLC_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\GiGL_OpenSpace_Sites_opportunityLC.shp"

# --- Step 2: Load the Shapefile ---
polygon = gpd.read_file(shp_opportunityLC_path)

## total area calculation
# Ensure CRS is projected (replace EPSG:XXXX with a suitable projection for your area, e.g., EPSG:5070 for US)
if polygon.crs.is_geographic:
    polygon = polygon.to_crs(epsg=5070)  # Albers Equal Area for US

# Calculate total area in square meters
total_area_m2 = polygon.geometry.area.sum()

# Optionally convert to square kilometers or hectares
total_area_km2 = total_area_m2 / 1e6
# total_area_ha = total_area_m2 / 10000

print(f"Total area of opportunity land cover: {total_area_km2:,.2f} km²")
# print(f"Total area of opportunity land cover: {total_area_ha:,.2f} hectares")

Total area of opportunity land cover: 33.40 km²


### create the lc scenario

In [66]:
lc_scenario_path = lc_path.replace(".tif", "_scenario2_opp2treecover.tif")


# --- Step 3: Reproject Shapefile if Needed ---
# (Skip if either CRS is None; otherwise align to raster CRS)
if polygon.crs and (polygon.crs != landcover_crs):
    polygon = polygon.to_crs(landcover_crs)

# Drop empty/invalid geometries (common source of rasterize errors)
polygon = polygon[polygon.geometry.notna() & ~polygon.geometry.is_empty]

# --- Step 4: Rasterize the Shapefile ---
shape_mask = rasterize(
    [(geom, 1) for geom in polygon.geometry],
    out_shape=landcover_data.shape,
    transform=landcover_meta["transform"],
    fill=0,
    dtype="uint8"
)


# --- Step 5: Apply the Mask to Update Land Cover Values ---

remapped2 = landcover_data.copy()

# Protect NoData if present
if nodata is not None:
    valid_mask = (landcover_data != nodata)
else:
    valid_mask = np.ones_like(landcover_data, dtype=bool)

target_mask = (shape_mask == 1) & valid_mask
remapped2[target_mask] = lc_code_new_tcc



# --- Step 6: Save the Updated Raster ---
landcover_meta.update(dtype=rasterio.uint8, compress="lzw")  # Ensure correct datatype

meta_out = landcover_meta.copy()
meta_out.update(
    dtype=rasterio.uint8,
    compress="lzw",
    nodata=nodata
)


with rasterio.open(lc_scenario_path, "w", **meta_out) as dst:
    dst.write(remapped2.astype(rasterio.uint8), 1)

print(f"Updated land cover raster saved at: {lc_scenario_path}")


# Scenario labels
land_cover_labels_scenario2 = land_cover_labels.copy()


Updated land cover raster saved at: G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2023_London_10m_clip2aoi_tcc24_scenario2_opp2treecover.tif


### % of LC change - run

In [67]:

# ---- Compute pixel area if projected (optional) ----
px_area_m2 = None
if landcover_crs and landcover_crs.is_projected:
    # rasterio Affine: transform.a = pixel width, transform.e = pixel height (negative)
    px_area_m2 = abs(transform.a) * abs(transform.e)

# ---- Summaries: BEFORE (original) and AFTER (remapped) ----
summary_before = summarize_lc_classes(
    landcover_data,
    land_cover_labels,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

# Optional: if you defined land_cover_labels_scenario; otherwise reuse land_cover_labels
labels_after = globals().get("land_cover_labels_scenario2", land_cover_labels)

summary_after = summarize_lc_classes(
    remapped2,
    labels_after,
    nodata=nodata,
    px_area_m2=px_area_m2,
    sort_by="class"
)

print("\n=== Class proportions BEFORE scenario ===")
print(summary_before.to_string(index=False))

print("\n=== Class proportions AFTER scenario ===")
print(summary_after.to_string(index=False))

# ---- Optional: quick change report for Tree->Built-up and Shrubland->Built-up ----
def class_count(df, code):
    row = df.loc[df["class_code"] == code, "count"]
    return int(row.iloc[0]) if len(row) else 0

# c_tree_before = class_count(summary_before, tree_cover_code)
# c_shrub_before = class_count(summary_before, shrubland_code)
# c_built_before = class_count(summary_before, pavement_code)

# c_tree_after  = class_count(summary_after, tree_cover_code)
# c_shrub_after = class_count(summary_after, shrubland_code)
# c_built_after = class_count(summary_after, pavement_code)

# print("\n=== Change summary ===")
# print(f"Tree cover: {c_tree_before:,} -> {c_tree_after:,}")
# print(f"Shrubland : {c_shrub_before:,} -> {c_shrub_after:,}")
# print(f"Built-up  : {c_built_before:,} -> {c_built_after:,} "
#       f"(+{(c_built_after - c_built_before):,})")

# ---- Optional: save to CSV ----
out_csv_before = lc_scenario_path.replace(".tif", "_class_summary_BEFORE.csv")
out_csv_after  = lc_scenario_path.replace(".tif", "_class_summary_AFTER.csv")
summary_before.to_csv(out_csv_before, index=False)
summary_after.to_csv(out_csv_after, index=False)
print(f"\nSaved summaries:\n- {out_csv_before}\n- {out_csv_after}")


=== Class proportions BEFORE scenario ===
 class_code                  label   count  proportion  percent     area_m2  area_ha  area_km2
          1     Deciduous woodland   81608    0.005117    0.512   8160800.0   816.08      8.16
          2    Coniferous woodland   11848    0.000743    0.074   1184800.0   118.48      1.18
          3                 Arable  570469    0.035773    3.577  57046900.0  5704.69     57.05
          4     Improved Grassland 2814783    0.176510   17.651 281478300.0 28147.83    281.48
          5      Neutral Grassland      10    0.000001    0.000      1000.0     0.10      0.00
          6   Calcareous Grassland   41369    0.002594    0.259   4136900.0   413.69      4.14
          8  Fen, Marsh, and Swamp   31479    0.001974    0.197   3147900.0   314.79      3.15
          9                Heather     906    0.000057    0.006     90600.0     9.06      0.09
         10      Heather grassland    4781    0.000300    0.030    478100.0    47.81      0.48
       

### change summary

In [68]:
## compute the change in area_km2 ------------------------------------------------------------------ 
import pandas as pd

# Read
df_before = pd.read_csv(out_csv_before)
df_after  = pd.read_csv(out_csv_after)

# Keep only what we need and rename
b = df_before[['class_code', 'area_km2']].rename(columns={'area_km2': 'area_km2_before'})
a = df_after [['class_code', 'area_km2']].rename(columns={'area_km2': 'area_km2_after'})

# Outer join on class_code
merged = b.merge(a, on='class_code', how='outer')

# Ensure numeric, then compute change
for c in ['area_km2_before', 'area_km2_after']:
    merged[c] = pd.to_numeric(merged[c], errors='coerce')

merged['area_km2_change'] = merged['area_km2_after'].fillna(0) - merged['area_km2_before'].fillna(0)

# Save
out_csv_change = lc_scenario_path.replace(".tif", "_class_summary_CHANGE.csv")
merged.to_csv(out_csv_change, index=False)
print(f"Saved: {out_csv_change}")

print("\n=== Class proportions changed ===")
print(merged.to_string(index=False))

Saved: G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\EP_preliminary_tests\clipped_lulc\UKECH\LCM2023_London_10m_clip2aoi_tcc24_scenario2_opp2treecover_class_summary_CHANGE.csv

=== Class proportions changed ===
 class_code  area_km2_before  area_km2_after  area_km2_change
          1             8.16            7.49            -0.67
          2             1.18            1.13            -0.05
          3            57.05           54.32            -2.73
          4           281.48          271.89            -9.59
          5             0.00            0.00             0.00
          6             4.14            4.07            -0.07
          8             3.15            2.86            -0.29
          9             0.09            0.09             0.00
         10             0.48            0.47            -0.01
         12             1.29            0.33            -0.96
         13             1.10            1.10             0.00
         14    