# OSM Preprocessing

###Importing Libraries

In [1]:
!pip install osmnx google-cloud-storage rasterio > /dev/null

!python -m osmnx --version

/usr/bin/python3: No module named osmnx.__main__; 'osmnx' is a package and cannot be directly executed


In [2]:
import os
import geopandas as gpd
import osmnx as ox
import rasterio
from rasterio.features import rasterize
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
import rasterio
import matplotlib.pyplot as plt


drive.mount('/content/drive')
work_dir = '/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/'
os.makedirs(work_dir, exist_ok=True)

Mounted at /content/drive


### Preprocessing

#### Area of Interest(Hamburg) Selection


In [3]:
# Area of Interest (AOI) to be processed
place_name = "Altona, Hamburg, Germany"

# Tags to be used for OSM data
# These tags can be enriched depending on the project's needs.
tags = {
    "building": True,
    "highway": True,
    "leisure": ["park", "garden", "playground"],
    "landuse": ["forest", "grass", "greenfield"],
    "natural": ["water", "wetland"]
}

PROCESSED_DIR = '/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/osm/'
os.makedirs(PROCESSED_DIR, exist_ok=True)
print(f"OSM data will be saved to '{PROCESSED_DIR}' directory.")

# Reference raster (currently disabled)
# REFERENCE_RASTER_PATH = '/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/sentinel2/B04_10m.jp2'
# if not os.path.exists(REFERENCE_RASTER_PATH):
#     raise FileNotFoundError(f"Reference raster file not found: {REFERENCE_RASTER_PATH}.")
# print(f"'{REFERENCE_RASTER_PATH}' will be used as the reference raster.")


OSM data will be saved to '/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/osm/' directory.


In [5]:
from shapely.geometry import box

# Define raster resolution and CRS
raster_resolution = 10  # meters
raster_crs = "EPSG:3857"  # Web Mercator projection

# Get bounding box of the AOI
boundary = ox.geocode_to_gdf(place_name).to_crs(raster_crs)
bounds = boundary.total_bounds
bbox = box(*bounds)

# Calculate raster dimensions and transform
width = int((bounds[2] - bounds[0]) / raster_resolution)
height = int((bounds[3] - bounds[1]) / raster_resolution)
transform = rasterio.transform.from_bounds(*bounds, width=width, height=height)


In [None]:
# Loop through years and generate yearly OSM raster masks
for year in range(2014, 2025):
    print(f"Processing OSM data for year {year}...")

    # Download OSM data (note: same data for each year due to API limitations)
    gdf = ox.features_from_place(place_name, tags=tags)
    gdf = gdf.to_crs(raster_crs)

Processing OSM data for year 2014...


In [None]:
# Downloading OSM data
#print(f"Downloading OSM data for '{place_name}'...")
#gdf = ox.features_from_place(place_name, tags=tags)
#print(f"Download completed. A total of {len(gdf)} geometries were found.")

#print(gdf.head())

Downloading OSM data for 'Altona, Hamburg, Germany'...
Download completed. A total of 87454 geometries were found.
                                geometry crossing            highway  \
element id                                                             
node    131185   POINT (9.8874 53.56527)       no    traffic_signals   
        491108  POINT (9.91555 53.58605)      NaN  motorway_junction   
        491128  POINT (9.89759 53.55653)      NaN  motorway_junction   
        491129  POINT (9.89719 53.55379)      NaN    traffic_signals   
        496093  POINT (9.91162 53.57753)      NaN          milestone   

               traffic_signals:direction TMC:cid_58:tabcd_1:Class  \
element id                                                          
node    131185                   forward                      NaN   
        491108                       NaN                    Point   
        491128                       NaN                    Point   
        491129                     

#### OSM Rasterization Strategy

For simplicity and efficiency, a single binary raster is generated per year (e.g., `osm_2014.tif`, `osm_2015.tif`, ..., `osm_2024.tif`) that combines all relevant OSM features such as buildings, roads, green spaces, and water bodies.

This approach reduces the number of input channels and simplifies data handling during preprocessing and model training.

If needed, semantic separation of OSM layers (e.g., buildings, roads, parks, water) can be implemented later to enhance model interpretability and performance. This would involve generating separate raster layers for each feature type per year.



In [None]:
# Buildings
#buildings = gdf[gdf['building'].notna()]
#print(f"A total of {len(buildings)} buildings were found.")

# Roads (Only select linear geometries)
#roads = gdf[gdf['highway'].notna() & (gdf.geom_type == 'LineString')]
#print(f"A total of {len(roads)} road segments were found.")

# Green Spaces (Parks, forests, grassy areas)
#green_spaces = gdf[
#    (gdf['leisure'].isin(['park', 'garden', 'playground'])) |
#    (gdf['landuse'].isin(['forest', 'grass', 'greenfield']))
#]
#print(f"A total of {len(green_spaces)} green spaces were found.")

# Water Bodies
#water = gdf[gdf['natural'].isin(['water', 'wetland'])]
#print(f"A total of {len(water)} water bodies were found.")


A total of 43780 buildings were found.
A total of 33695 road segments were found.
A total of 1706 green spaces were found.
A total of 319 water bodies were found.


#### Rasterizing Vector layers


In [None]:
def rasterize_with_reference(gdf, reference_raster_path, output_path):
    """
    Rasterizes a GeoDataFrame using the spatial metadata of a reference raster.
    """
    with rasterio.open(reference_raster_path) as src:
        ref_meta = src.meta.copy()
        ref_transform = src.transform
        ref_shape = src.shape
        ref_crs = src.crs

    # Reproject vector data to match reference raster CRS
    if gdf.crs != ref_crs:
        gdf = gdf.to_crs(ref_crs)

    print(f"Rasterizing unified OSM mask to: {output_path}")
    rasterized_array = rasterize(
        shapes=[(geom, 1) for geom in gdf.geometry if geom.is_valid],
        out_shape=ref_shape,
        transform=ref_transform,
        fill=0,
        dtype=rasterio.uint8
    )

    # Save raster
    ref_meta.update(dtype='uint8', count=1, compress='lzw')
    with rasterio.open(output_path, 'w', **ref_meta) as dst:
        dst.write(rasterized_array, 1)

    return rasterized_array



In [None]:
for year in range(2014, 2025):
    print(f"Processing unified OSM raster for year {year}...")

    # Download OSM data (same for each year due to API limitations)
    gdf = ox.features_from_place(place_name, tags=tags)

    # Output path
    output_path = os.path.join(PROCESSED_DIR, f"osm_{year}.tif")

    # Rasterize using reference raster
    rasterize_with_reference(gdf, REFERENCE_RASTER_PATH, output_path)

    print(f"Saved unified OSM raster for {year} to {output_path}")


Old code

In [None]:
# In this step, each separated vector layer will be converted into a raster (GeoTIFF) file
# with the same size and resolution as the reference Sentinel-2 image. [cite: 151, 173]

def rasterize_and_save(gdf, reference_raster_path, output_path):
    """
    Rasterizes a GeoDataFrame based on a reference raster and saves it.
    """
    # Open the reference raster and get its metadata
    with rasterio.open(reference_raster_path) as src:
        ref_meta = src.meta.copy()
        ref_transform = src.transform
        ref_shape = src.shape
        ref_crs = src.crs

    # Match the CRS of the vector data to the reference raster
    if gdf.crs != ref_crs:
        gdf = gdf.to_crs(ref_crs)

    print(f"Rasterizing for '{output_path}'...")
    # Perform rasterization
    rasterized_array = rasterize(
        shapes=[(geom, 1) for geom in gdf.geometry],  # Assign value 1 to pixels with geometry
        out_shape=ref_shape,
        transform=ref_transform,
        fill=0,  # Assign value 0 to all other pixels
        dtype=rasterio.uint8
    )

    # Save the new raster
    ref_meta.update(dtype='uint8', count=1, compress='lzw')
    with rasterio.open(output_path, 'w', **ref_meta) as dst:
        dst.write(rasterized_array, 1)

    print(f"'{output_path}' has been successfully created.")
    return rasterized_array

# Rasterize each layer
#building_mask = rasterize_and_save(buildings, REFERENCE_RASTER_PATH, os.path.join(PROCESSED_DIR, 'building_mask.tif'))
#road_mask = rasterize_and_save(roads, REFERENCE_RASTER_PATH, os.path.join(PROCESSED_DIR, 'road_mask.tif'))
#green_space_mask = rasterize_and_save(green_spaces, REFERENCE_RASTER_PATH, os.path.join(PROCESSED_DIR, 'green_space_mask.tif'))
#water_mask = rasterize_and_save(water, REFERENCE_RASTER_PATH, os.path.join(PROCESSED_DIR, 'water_mask.tif'))

#print("\nAll OSM layers have been successfully rasterized and saved.")


### Visualization (For Verification)

In [None]:
# Visualize the raster using matplotlib
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(osm_mask, cmap='viridis')
ax.set_title('Rasterized OSM Mask for Hamburg (2024)')
ax.set_axis_off()
plt.show()

Old

In [None]:
# Visualize one of the raster masks we created to verify the correctness of the process.

print("Visualizing the building mask (building_mask.tif)...")

# Visualization using Matplotlib
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(building_mask, cmap='viridis')  # 1s appear yellow, 0s appear purple
ax.set_title('Rasterized Building Mask for Hamburg')
ax.set_axis_off()  # Hide axes
plt.show()


#### Summary and Next Steps

In this notebook, we downloaded OpenStreetMap (OSM) data for Hamburg and generated yearly raster masks from 2014 to 2024 using a 10-meter resolution reference raster (Sentinel-2 B04 band). Each mask combines relevant urban features such as buildings, roads, green spaces, and water bodies into a single binary raster.

These masks were saved in the `data/raw/osm/` directory and are spatially aligned with the satellite data, ready for integration.

In the next notebook, `03_data_processing.ipynb`, we will combine these OSM masks with Land Surface Temperature (LST) and spectral indices (e.g., NDVI, NDBI) to create a multi-channel tensor. This tensor will serve as the final input for the U-Net segmentation model to detect and analyze Urban Heat Island (UHI) patterns.

