In [None]:
'''
PPCA 4.0 RESIDENTIAL & NON-RESIDENTIAL BUILDINGS : CLASSIFICATION BASED ON ATTRIBUTES

Author : Perez, Joan

This script filter out buildings with a footprint area less than 15 m² and optionally filters out buildings that have no walls, if the 'wall' column
exists. It then create a column 'type' within the OSM building data with three possible values (# 0 : NA ; 1 : residential or mixed-use ; 
2 : non-residential). Values are filled using the OSM attributes 'building_type' : apartments', 'barracks', 'house', 'residential', 'bungalow', 
'cabin', 'detached', 'dormitory', 'farm', 'static_caravan', 'semidetached_house' & 'stilt_house' are considered as residential or mixed-use 
buildings. Finally, the classification is refined by attributing 0 values to null values based on the spatial relationships with non-populated 
OSM land use areas. Final score of classified buildings vs buldings with null values are printed and mapped.

Requirements:
- A specific working environment (see README on the github page of the project https://github.com/perezjoan/PPCA-codes?tab=readme-ov-file)
- Output file from PPCA 3.0 ('osm_all_buildings_ind' (Polygon), OSM all buildings)
- Output file from PPCA 2.0 ('osm_non_populated_areas' (Polygon), OSM land use data with non-populated areas)

Guide to run the script:
- Fill 0.2 box

Output :
- A geopackage file with a single layer
    * 'osm_all_buildings_res_type_with_null' (Polygon), osm buildings with residential classification and null

Acknowledgement: This resource was produced within the emc2 project, which is funded by ANR (France), FFG (Austria), MUR (Italy) and 
Vinnova (Sweden) under the Driving Urban Transition Partnership, which has been co-funded by the European Commission.

License: Attribution-ShareAlike 4.0 International - CC-BY-SA-4.0 license
'''

In [1]:
# 0.1 : libraries
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

In [2]:
# 0.2 : Box to fil with informations

# Name of the case study
Name = 'Nice'

# Define projected CRS
projected_crs = 'EPSG:2154'

In [None]:
# 0.3 Data preparation
gpkg = f'PPCA3_{Name}.gpkg'
gpkg2 = f'PPCA2_{Name}.gpkg'
all_building = gpd.read_file(gpkg, layer = 'osm_all_buildings_ind')
non_populated_areas = gpd.read_file(gpkg2, layer = 'osm_non_populated_areas')

In [3]:
# 1. FILTER SMALL BUILDINGS, BUILDINGS WITH NO WALLS & UNDERGROUND BUILDINGS

# Filter out buildings with a surface footprint less than 15 m²
all_building = all_building.to_crs(projected_crs)
building_filtered = all_building[all_building['A'] >= 15]

# Filter out buildings with no walls (if the wall column exists)
if 'wall' in building_filtered.columns:
    building_filtered = building_filtered[building_filtered['wall'] != 'no']

# Remove buildings that have a 0 value in the buildings:levels column (0 = underground buildings 
# https://wiki.openstreetmap.org/wiki/Key:building:levels)
if 'buildings:levels' in building_filtered.columns:
    building_filtered = building_filtered[building_filtered['buildings:levels'] != 0]

In [4]:
# 2. CLASSIFICATION USING OSM ATTRIBUTES
# Define a function to determine the type based on the 'building' column (0 : NA ; 1 : residential or mixed-use ; 2 : non-residential)
# Attribute values : https://wiki.openstreetmap.org/wiki/Key:building
def assign_type(building_type):
    if building_type == 'yes':
        return 0
    elif building_type in ['apartments', 'barracks', 'house', 'residential', 'bungalow', 'cabin', 'detached', 'dormitory', 'farm', 'static_caravan',
                          'semidetached_house', 'stilt_house']:
        return 1
    else:
        return 2

# Add a new column 'type' and apply the conditions
building_filtered = building_filtered.copy()
building_filtered.loc[:, 'type'] = building_filtered['building'].apply(assign_type)

In [5]:
# 3. REFINED CLASSIFICATION
# Spatial join with non-residential landuse areas
non_populated_areas = non_populated_areas.to_crs(projected_crs)
joined_data = gpd.sjoin(building_filtered, non_populated_areas, how="left", predicate="intersects")

# Buildings with NA in non-residential landuse areas are given the value 2 (non residential)
joined_data.loc[(joined_data['type'] == 0) & joined_data['landuse'].notnull(), 'type'] = 2

# Update 'type' to 2 if 'type' is 0 (NA) and at least one non-null value exists in the following columns : 'tourism', 'parking', 'shop' or 'office'
condition = (joined_data['type'] == 0) & \
            (joined_data['tourism'].notnull() | \
             joined_data['parking'].notnull() | \
             joined_data['shop'].notnull() | \
             joined_data['office'].notnull())
joined_data.loc[condition, 'type'] = 2

In [None]:
# A1. Save Outputs
gpkg = f'PPCA4_{Name}.gpkg'
joined_data.to_file(gpkg, layer='osm_all_buildings_res_type_with_null', driver="GPKG")

In [None]:
# A2. Map & Statistics
# Print the percentage and count of each building type
feature_counts = joined_data['type'].value_counts()
type_counts = joined_data['type'].value_counts(normalize=True) * 100
print("Percentage and count of building types using attribute values, specialized columns & landuse")
for type_code, percentage in type_counts.items():
    feature_count = feature_counts[type_code]
    if type_code == 0:
        print(f"0 : NA: {percentage:.2f}% ({feature_count} buildings)")
    elif type_code == 1:
        print(f"1 : Residential or mixed-use: {percentage:.2f}% ({feature_count} buildings)")
    else:
        print(f"2 : Non-residential: {percentage:.2f}% ({feature_count} buildings)")

columns_to_drop = ['index_right', 'element_type_right', 'osmid_right', 'landuse']
joined_data = joined_data.drop(columns=columns_to_drop)

# Plot with custom colors
colors = ['red', 'blue', 'green']
fig, ax = plt.subplots(figsize=(10, 10))
joined_data.plot(column='type', categorical=True, legend=True, ax=ax, cmap=ListedColormap(colors))
plt.title('Distribution of Building Types')
plt.show()