# Packages

In [1]:
import geopandas as gpd
import os


In [2]:

# Define file path
file_path = r"G:\Shared drives\Wellcome Trust Project Data\0_source_data\GiGL land use data\GiGL_OpenSpace_Sites_All_region\GiGL_OpenSpace_Sites_All_region.shp"

# Read the spatial data
d = gpd.read_file(file_path)


## 7 park categories 

In [3]:

# Display column names
print(d.columns)

# Display unique values for specific columns
# print(sorted(d["PrimaryUse"].dropna().unique()))
print(len(d["PrimaryUse"].dropna().unique()))

# print(sorted(d["OtherUses"].dropna().unique()))
print(len(d["OtherUses"].dropna().unique()))


# Define keywords
keywords = ['Park', 'Garden', 'Open Space']

df = d

col_remove = [
    'Easting', 'Northing', 'Qualifier', 'GridRef', 'Postcode',
    'OwnerType', 'Owner', 'Manager', 'AccessRest', 'AccessDets',
    'OpenTimes', 'UploadOnli', 'GoParksSit', 'GoParksAdd', 'GoParksCom',
    'SiteDesc', 'Website', 'FriendsGrp', 'SurveyName', 'SurveyDate',
    'SurveyRef', 'ModDate', 'ModUser', 'ModComm', 'OtherNames', 'Ward',
    'Address', 'OwnerURL', 'SiteHistor', 'LSMDate', 'LSMParcels',
    'VeriStatus', 'VeriDate', 'VeriBy', 'StatDes', 'NonStatDes',
    'LandscDes', 'ByelawDes', 'SPA', 'SAC', 'Ramsar', 'NNR', 'SSSI', 'LNR',
    'SINC', 'COS', 'POS', 'Features', 'Licence']

df = df.drop(columns=col_remove)

print(df.columns)

# Use `.str.contains()` to find rows where `SiteName` contains any keyword & `PrimaryUse` is blank
mask = df['PrimaryUse'].eq('') & df['SiteName'].str.contains('|'.join(keywords), case=False, na=False)

# Fill missing values in `PrimaryUse` with "Parks"
df.loc[mask, 'PrimaryUse'] = 'Parks'

# # Display updated DataFrame
# print(df.head())


print('\n')


# Display unique values for specific columns
ls = sorted(df["PrimaryUse"].dropna().unique())
print(ls)




Index(['SiteName', 'SiteID', 'PPG17', 'PrimaryUse', 'OtherUses', 'Borough',
       'AreaHa', 'Easting', 'Northing', 'Qualifier', 'GridRef', 'Postcode',
       'OwnerType', 'Owner', 'Manager', 'Access', 'AccessRest', 'AccessDets',
       'OpenTimes', 'UploadOnli', 'GoParksSit', 'GoParksAdd', 'GoParksCom',
       'SiteDesc', 'Website', 'FriendsGrp', 'SurveyName', 'SurveyDate',
       'SurveyRef', 'ModDate', 'ModUser', 'ModComm', 'OtherNames', 'Ward',
       'Address', 'OwnerURL', 'SiteHistor', 'LSMDate', 'LSMParcels',
       'VeriStatus', 'VeriDate', 'VeriBy', 'StatDes', 'NonStatDes',
       'LandscDes', 'ByelawDes', 'SPA', 'SAC', 'Ramsar', 'NNR', 'SSSI', 'LNR',
       'SINC', 'COS', 'POS', 'ConsArea', 'EnglishHer', 'GreenBelt',
       'GreenChain', 'GreenCor', 'GreenFlag', 'LonCommon', 'LonSquare', 'MOL',
       'POPS', 'POSGrade', 'Features', 'Licence', 'geometry'],
      dtype='object')
42
294
Index(['SiteName', 'SiteID', 'PPG17', 'PrimaryUse', 'OtherUses', 'Borough',
       'AreaHa',

#### classification function 

In [11]:
## load function 
code_dir = r'D:\natcap\urban-cooling-health\scripts'
code_path = os.path.join(code_dir, "function_park_classification.py")

# %run viz_pd.py
with open(code_path) as f:
    exec(f.read())


# --- Apply the functions ---
df["TextClass"] = df.apply(classify_by_name, axis=1)     # Column B
df["AreaClass"] = df.apply(classify_by_area, axis=1)         # Column D
df["FinalClass"] = df.apply(final_classification, axis=1)      # Column E

print(df.head())

output_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_7class.shp"

# Save filtered data as a new shapefile
df.to_file(output_path, driver="ESRI Shapefile")

print(f"Filtered shapefile saved at: {output_path}")

                       SiteName      SiteID               PPG17  \
0        Upminster Sewage Works  OS_Hv_0541               Other   
1          Fields N of Fen Lane  OS_Hv_0140  Other Urban Fringe   
2        Top Meadow Corner Farm  OS_Hv_0842  Other Urban Fringe   
3  Field adjacent to The Warren  OS_Hv_0677               Other   
4                 Fairplay Farm  OS_Hv_0130  Other Urban Fringe   

           PrimaryUse OtherUses   Borough    AreaHa      Access ConsArea  \
0  Sewage/water works      None  Havering    8.2868        None       No   
1         Agriculture      None  Havering  169.0764  Restricted       No   
2         Agriculture      None  Havering    8.7391        None       No   
3         Vacant land      None  Havering    0.3646    De facto       No   
4         Agriculture      None  Havering   81.9128        None       No   

  EnglishHer  ... GreenFlag LonCommon LonSquare MOL POPS POSGrade  \
0         No  ...        No        No        No  No   No     None   
1 

# LC scenarios 

### Opportunity land cover

In [12]:


print(sorted(d["POSGrade"].dropna().unique()))

# Define categories to filter
PrimaryUse_select = [
    "Disused quarry/gravel pit", "Disused railway trackbed", "Land reclamation",
    "Other hard surfaced areas", "Other recreational", "Road island/verge",
    "Vacant land"
]

# Filter data based on the selected categories
d_filtered = d[d["PrimaryUse"].isin(PrimaryUse_select)]

# # Display the filtered data
# print(d_filtered.head())



['District Park', 'Linear Open Space', 'Local Park and Open Space', 'Metropolitan Park', 'Pocket Park', 'Pocket park', 'Regional Park', 'Small Open Space']


#### Save filtered shp

In [None]:

output_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_Sites_opportunityLC.shp"

# Save filtered data as a new shapefile
d_filtered.to_file(output_path, driver="ESRI Shapefile")

print(f"Filtered shapefile saved at: {output_path}")


## Load LC data

In [None]:
import rasterio
import geopandas as gpd
import numpy as np
from rasterio.mask import mask
from rasterio.features import rasterize

# --- File Paths ---
landcover_raster_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_v200_Mosaic_Mask_proj.tif"
filtered_shapefile_path = "G:/Shared drives/Wellcome Trust Project Data/1_preprocess/UrbanCoolingModel/GiGL_OpenSpace_Sites_opportunityLC.shp"
output_raster_path = r"G:\Shared drives\Wellcome Trust Project Data\1_preprocess\UrbanCoolingModel\ESA_WorldCover_10m_2021_update.tif"

# --- Step 1: Load the Raster ---
with rasterio.open(landcover_raster_path) as src:
    landcover_data = src.read(1)  # Read the first band
    landcover_meta = src.meta.copy()  # Copy metadata
    landcover_crs = src.crs  # Get raster CRS
    nodata = src.nodata


## % of each LC - baseline

In [9]:
from collections import Counter

# Define labels
land_cover_labels = {
    1: 'Tree cover',
    2: 'Shrubland',
    3: 'Grassland',
    4: 'Cropland',
    5: 'Built-up',
    6: "Bare / sparse vegetation",
    7: "Snow and ice",
    8: "Permanent water bodies",
    9: "Herbaceous wetland",
    10: "Moss and lichen"
}


# Flatten the array and remove NoData values
valid_pixels = landcover_data[landcover_data != nodata].flatten()

# Count frequency of each land use type
counts = Counter(valid_pixels)

# Total number of valid pixels
total_pixels = sum(counts.values())

# Calculate proportions
proportions = {land_use: count / total_pixels for land_use, count in counts.items()}

# Print result
# Print with labels
print("Land use proportions:")
for lu_type, prop in proportions.items():
    label = land_cover_labels.get(lu_type, f"Unknown ({lu_type})")
    print(f"{label}: {prop:.2%}")

Land use proportions:
Built-up: 43.26%
Tree cover: 33.89%
Grassland: 18.15%
Cropland: 2.51%
Bare / sparse vegetation: 0.07%
Permanent water bodies: 2.09%
Herbaceous wetland: 0.04%
Shrubland: 0.00%


## Create LC scenarios 

In [None]:

# --- Step 2: Load the Shapefile ---
d_filtered = gpd.read_file(filtered_shapefile_path)

# --- Step 3: Reproject Shapefile if Needed ---
if d_filtered.crs != landcover_crs:
    d_filtered = d_filtered.to_crs(landcover_crs)

# --- Step 4: Rasterize the Shapefile ---
# Convert geometry into rasterized mask
shape_mask = rasterize(
    [(geom, 1) for geom in d_filtered.geometry],  # Assign value 1 for overlapping areas
    out_shape=landcover_data.shape,
    transform=landcover_meta["transform"],
    fill=0,  # Default value for non-overlapping areas
    dtype=np.uint8
)

# --- Step 5: Apply the Mask to Update Land Cover Values ---
tree_cover_code = 1  # ESA WorldCover Code for "Tree cover"
landcover_data[shape_mask == 1] = tree_cover_code  # Change only where shape_mask == 1

# --- Step 6: Save the Updated Raster ---
landcover_meta.update(dtype=rasterio.uint8, compress="lzw")  # Ensure correct datatype

with rasterio.open(output_raster_path, "w", **landcover_meta) as dst:
    dst.write(landcover_data.astype(rasterio.uint8), 1)

print(f"Updated land cover raster saved at: {output_raster_path}")


## % of each LC - scenario

In [13]:
## load lc scenario data
with rasterio.open(output_raster_path) as src:
    landcover_data = src.read(1)  # Read the first band
    landcover_meta = src.meta.copy()  # Copy metadata
    landcover_crs = src.crs  # Get raster CRS
    nodata = src.nodata


# Flatten the array and remove NoData values
valid_pixels = landcover_data[landcover_data != nodata].flatten()

# Count frequency of each land use type
counts = Counter(valid_pixels)

# Total number of valid pixels
total_pixels = sum(counts.values())

# Calculate proportions
proportions = {land_use: count / total_pixels for land_use, count in counts.items()}

# Print result
# Print with labels
print("Land use proportions:")
for lu_type, prop in proportions.items():
    label = land_cover_labels.get(lu_type, f"Unknown ({lu_type})")
    print(f"{label}: {prop:.2%}")

Land use proportions:
Tree cover: 34.98%
Built-up: 42.85%
Grassland: 17.60%
Cropland: 2.44%
Bare / sparse vegetation: 0.06%
Permanent water bodies: 2.03%
Herbaceous wetland: 0.03%
Shrubland: 0.00%
