In [2]:
import geopandas as gpd

# Define file path
file_path = r"G:\Shared drives\Wellcome Trust Project Data\0_source_data\GiGL land use data\GiGL_OpenSpace_Sites_All_region\GiGL_OpenSpace_Sites_All_region.shp"

# Read the spatial data
d = gpd.read_file(file_path)


### parks

In [3]:

# Display column names
print(d.columns)

# Display unique values for specific columns
print(sorted(d["PrimaryUse"].dropna().unique()))
print(len(d["PrimaryUse"].dropna().unique()))

print(sorted(d["OtherUses"].dropna().unique()))
print(len(d["OtherUses"].dropna().unique()))


# Define keywords
keywords = ['Park', 'Garden', 'Open Space']

df = d

col_remove = [
    'Easting', 'Northing', 'Qualifier', 'GridRef', 'Postcode',
    'OwnerType', 'Owner', 'Manager', 'AccessRest', 'AccessDets',
    'OpenTimes', 'UploadOnli', 'GoParksSit', 'GoParksAdd', 'GoParksCom',
    'SiteDesc', 'Website', 'FriendsGrp', 'SurveyName', 'SurveyDate',
    'SurveyRef', 'ModDate', 'ModUser', 'ModComm', 'OtherNames', 'Ward',
    'Address', 'OwnerURL', 'SiteHistor', 'LSMDate', 'LSMParcels',
    'VeriStatus', 'VeriDate', 'VeriBy', 'StatDes', 'NonStatDes',
    'LandscDes', 'ByelawDes', 'SPA', 'SAC', 'Ramsar', 'NNR', 'SSSI', 'LNR',
    'SINC', 'COS', 'POS', 'Features', 'Licence']

df = df.drop(columns=col_remove)


# Use `.str.contains()` to find rows where `SiteName` contains any keyword & `PrimaryUse` is blank
mask = df['PrimaryUse'].eq('') & df['SiteName'].str.contains('|'.join(keywords), case=False, na=False)

# Fill missing values in `PrimaryUse` with "Parks"
df.loc[mask, 'PrimaryUse'] = 'Parks'

# Display updated DataFrame
print(df.head())

Index(['SiteName', 'SiteID', 'PPG17', 'PrimaryUse', 'OtherUses', 'Borough',
       'AreaHa', 'Easting', 'Northing', 'Qualifier', 'GridRef', 'Postcode',
       'OwnerType', 'Owner', 'Manager', 'Access', 'AccessRest', 'AccessDets',
       'OpenTimes', 'UploadOnli', 'GoParksSit', 'GoParksAdd', 'GoParksCom',
       'SiteDesc', 'Website', 'FriendsGrp', 'SurveyName', 'SurveyDate',
       'SurveyRef', 'ModDate', 'ModUser', 'ModComm', 'OtherNames', 'Ward',
       'Address', 'OwnerURL', 'SiteHistor', 'LSMDate', 'LSMParcels',
       'VeriStatus', 'VeriDate', 'VeriBy', 'StatDes', 'NonStatDes',
       'LandscDes', 'ByelawDes', 'SPA', 'SAC', 'Ramsar', 'NNR', 'SSSI', 'LNR',
       'SINC', 'COS', 'POS', 'ConsArea', 'EnglishHer', 'GreenBelt',
       'GreenChain', 'GreenCor', 'GreenFlag', 'LonCommon', 'LonSquare', 'MOL',
       'POPS', 'POSGrade', 'Features', 'Licence', 'geometry'],
      dtype='object')
['Adventure playground', 'Agriculture', 'Allotments', 'Amenity green space', 'Canal', 'Cemetery/chu

### opportunity land cover

In [None]:


print(sorted(d["POSGrade"].dropna().unique()))

# Define categories to filter
PrimaryUse_select = [
    "Disused quarry/gravel pit", "Disused railway trackbed", "Land reclamation",
    "Other hard surfaced areas", "Other recreational", "Road island/verge",
    "Vacant land"
]

# Filter data based on the selected categories
d_filtered = d[d["PrimaryUse"].isin(PrimaryUse_select)]

# # Display the filtered data
# print(d_filtered.head())



Index(['SiteName', 'SiteID', 'PPG17', 'PrimaryUse', 'OtherUses', 'Borough',
       'AreaHa', 'Easting', 'Northing', 'Qualifier', 'GridRef', 'Postcode',
       'OwnerType', 'Owner', 'Manager', 'Access', 'AccessRest', 'AccessDets',
       'OpenTimes', 'UploadOnli', 'GoParksSit', 'GoParksAdd', 'GoParksCom',
       'SiteDesc', 'Website', 'FriendsGrp', 'SurveyName', 'SurveyDate',
       'SurveyRef', 'ModDate', 'ModUser', 'ModComm', 'OtherNames', 'Ward',
       'Address', 'OwnerURL', 'SiteHistor', 'LSMDate', 'LSMParcels',
       'VeriStatus', 'VeriDate', 'VeriBy', 'StatDes', 'NonStatDes',
       'LandscDes', 'ByelawDes', 'SPA', 'SAC', 'Ramsar', 'NNR', 'SSSI', 'LNR',
       'SINC', 'COS', 'POS', 'ConsArea', 'EnglishHer', 'GreenBelt',
       'GreenChain', 'GreenCor', 'GreenFlag', 'LonCommon', 'LonSquare', 'MOL',
       'POPS', 'POSGrade', 'Features', 'Licence', 'geometry'],
      dtype='object')
['Adventure playground', 'Agriculture', 'Allotments', 'Amenity green space', 'Canal', 'Cemetery/chu

### Save filtered shp

In [None]:

output_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_Sites_opportunityLC.shp"

# Save filtered data as a new shapefile
d_filtered.to_file(output_path, driver="ESRI Shapefile")

print(f"Filtered shapefile saved at: {output_path}")


## Create LU scenarios 

In [9]:
import rasterio
import geopandas as gpd
import numpy as np
from rasterio.mask import mask
from rasterio.features import rasterize

# --- File Paths ---
landcover_raster_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_v200_Mosaic_Mask_proj.tif"
filtered_shapefile_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_Sites_opportunityLC.shp"
output_raster_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_update.tif"

# --- Step 1: Load the Raster ---
with rasterio.open(landcover_raster_path) as src:
    landcover_data = src.read(1)  # Read the first band
    landcover_meta = src.meta.copy()  # Copy metadata
    landcover_crs = src.crs  # Get raster CRS

# --- Step 2: Load the Shapefile ---
d_filtered = gpd.read_file(filtered_shapefile_path)

# --- Step 3: Reproject Shapefile if Needed ---
if d_filtered.crs != landcover_crs:
    d_filtered = d_filtered.to_crs(landcover_crs)

# --- Step 4: Rasterize the Shapefile ---
# Convert geometry into rasterized mask
shape_mask = rasterize(
    [(geom, 1) for geom in d_filtered.geometry],  # Assign value 1 for overlapping areas
    out_shape=landcover_data.shape,
    transform=landcover_meta["transform"],
    fill=0,  # Default value for non-overlapping areas
    dtype=np.uint8
)

# --- Step 5: Apply the Mask to Update Land Cover Values ---
tree_cover_code = 1  # ESA WorldCover Code for "Tree cover"
landcover_data[shape_mask == 1] = tree_cover_code  # Change only where shape_mask == 1

# --- Step 6: Save the Updated Raster ---
landcover_meta.update(dtype=rasterio.uint8, compress="lzw")  # Ensure correct datatype

with rasterio.open(output_raster_path, "w", **landcover_meta) as dst:
    dst.write(landcover_data.astype(rasterio.uint8), 1)

print(f"Updated land cover raster saved at: {output_raster_path}")


Updated land cover raster saved at: G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_update.tif
