# 02 – Building Classification & Green Roof Statistics (Spatial Analysis)

This notebook:
1. Vectorises the binary segmentation raster into green-roof polygons
2. Loads OSM building footprints
3. Intersects polygons with footprints to assign green-roof coverage per building
4. Computes status-quo green-roof percentage per building/roof type

In [None]:
import sys
sys.path.insert(0, '..')

import os
import matplotlib.pyplot as plt
import geopandas as gpd

from src.classification.spatial_analysis import (
    load_building_footprints,
    raster_to_vector,
    classify_buildings,
    compute_green_roof_stats,
)

## 1  Paths

In [None]:
PRED_PATH       = '../data/results/prediction.tif'          # From notebook 01
BUILDINGS_PATH  = '../data/raw/buildings_berlin.geojson'    # OSM / Overpass Turbo export
OUTPUT_BLDG     = '../data/results/classified_buildings.gpkg'
OUTPUT_STATS    = '../data/results/green_roof_stats.csv'
CRS_TARGET      = 'EPSG:25833'  # UTM zone 33N – standard for Berlin

## 2  Vectorise the segmentation prediction

In [None]:
if os.path.isfile(PRED_PATH):
    green_roof_polygons = raster_to_vector(
        prediction_path=PRED_PATH,
        green_roof_value=1,
        min_area_m2=10.0,  # discard tiny noise polygons
    )
    print(f'Green-roof polygons: {len(green_roof_polygons)}')
    print(f'Total green-roof area: {green_roof_polygons.area.sum():.0f} m²')
    print(green_roof_polygons.head())
else:
    print('Prediction raster not found – run notebook 01 first.')
    green_roof_polygons = None

## 3  Load building footprints from OSM / Overpass Turbo

In [None]:
if os.path.isfile(BUILDINGS_PATH):
    buildings = load_building_footprints(BUILDINGS_PATH, crs_target=CRS_TARGET)
    print(f'Buildings loaded: {len(buildings)}')
    print(buildings.columns.tolist())
    print(buildings.head())
else:
    print('Building footprints not found – skipping.')
    buildings = None

## 4  Classify buildings (intersect segmentation with footprints)

In [None]:
if buildings is not None and green_roof_polygons is not None:
    classified = classify_buildings(
        buildings=buildings,
        green_roof_polygons=green_roof_polygons,
        building_tag_col='building',  # OSM attribute column
    )
    print(classified[['roof_type', 'building_area_m2', 'green_roof_area_m2',
                       'green_roof_fraction', 'is_green_roof']].head(10))

    # Save result
    os.makedirs(os.path.dirname(OUTPUT_BLDG), exist_ok=True)
    classified.to_file(OUTPUT_BLDG, driver='GPKG')
    print(f'Saved classified buildings to: {OUTPUT_BLDG}')
else:
    classified = None
    print('Skipping classification – required inputs are missing.')

## 5  Compute status-quo green-roof statistics

In [None]:
if classified is not None:
    stats = compute_green_roof_stats(classified, roof_type_col='roof_type')
    print(stats.to_string(index=False))

    stats.to_csv(OUTPUT_STATS, index=False)
    print(f'\nStats saved to: {OUTPUT_STATS}')

    # Bar chart: green-roof percentage per roof type
    fig, ax = plt.subplots(figsize=(9, 5))
    ax.bar(stats['roof_type'], stats['green_roof_percentage'], color='seagreen')
    ax.set_xlabel('Roof type')
    ax.set_ylabel('Green roof buildings (%)')
    ax.set_title('Status Quo – Green Roof Percentage by Roof Type')
    ax.set_ylim(0, 100)
    plt.xticks(rotation=30, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print('No classified buildings available.')

## 6  Map: buildings coloured by green-roof fraction

In [None]:
if classified is not None:
    fig, ax = plt.subplots(figsize=(10, 10))
    classified.plot(
        column='green_roof_fraction',
        cmap='YlGn',
        legend=True,
        legend_kwds={'label': 'Green Roof Fraction'},
        ax=ax,
        linewidth=0.1,
        edgecolor='grey',
    )
    ax.set_title('Green Roof Coverage per Building (Status Quo)')
    ax.axis('off')
    plt.tight_layout()
    plt.show()