In [2]:
import osmium
import re

import geopandas as gpd
import pandas as pd
import numpy as np

In [36]:
LEVEL_HEIGHT = 3.4

# https://wiki.openstreetmap.org/wiki/Simple_3D_buildings#Other_roof_tags
def _feet_to_meters(s):
    r = re.compile("([0-9]*\.?[0-9]+)'([0-9]*\.?[0-9]+)?\"?")
    m = r.findall(s)[0]
    if len(m[0]) > 0 and len(m[1]) > 0:
        m = float(m[0]) + float(m[1]) / 12.0
    elif len(m[0]) > 0:
        m = float(m[0])
    return m * 0.3048

def _get_height(tags):
    if 'height' in tags:
        # already accounts for roof
        if '\'' in tags['height'] or '\"' in tags['height']:
            return _feet_to_meters(tags['height'])
        r = re.compile(r"[-+]?\d*\.\d+|\d+")
        return float(r.findall(tags['height'])[0])
    if 'levels' in tags:
        roof_height = 0
        if 'roof_height' in tags:
            if '\'' in tags['roof_height'] or '\"' in tags['roof_height']:
                roof_height = _feet_to_meters(tags['roof_height'])
            else:
                r = re.compile(r"[-+]?\d*\.\d+|\d+")
                roof_height = float(r.findall(tags['roof_height'])[0])

        # does not account for roof height
        height = float(tags['levels']) * LEVEL_HEIGHT
        if 'roof_levels' in tags and roof_height == 0:
            height += float(tags['roof_levels']) * LEVEL_HEIGHT
        return height
    return 7.0

def _get_x(tags, x):
    if x in tags:
        return tags[x]
    else:
        return np.nan


def _get_min_height(tags):
    if 'min_height' in tags:
        # already accounts for roof
        if '\'' in tags['min_height'] or '\"' in tags['min_height']:
            return _feet_to_meters(tags['min_height'])
        r = re.compile(r"[-+]?\d*\.\d+|\d+")
        return float(r.findall(tags['min_height'])[0])
    if 'min_level' in tags:
        height = float(tags['min_level']) * LEVEL_HEIGHT
        return height
    return 0.0

class BuildingHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.geometry = []
        self.height = []
        self.min_height = []
        self.wkbfab = osmium.geom.WKBFactory()
        self.roof_material = []
        self.roof_shape = []
        self.building_material = []
        self.building_colour = []
        # self.file = open("tags.txt", "a")

    def get_gdf(self):
        geometry = gpd.GeoSeries.from_wkb(self.geometry, crs='epsg:4326')
        
        return gpd.GeoDataFrame({
            'geometry': geometry,
            'min_height': self.min_height,
            'height': self.height,
            'roof_material': self.roof_material,
            'roof_shape': self.roof_shape,
            'building_material': self.building_material,
            'building_colour': self.building_colour
        }, index=geometry.index)

    def area(self, a):
        tags = a.tags
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type', None) == 'building'):
            return
        # Disqualifiers
        if (tags.get('location', None) == 'underground' or 'bridge' in tags):
            return
        try:
            poly = self.wkbfab.create_multipolygon(a)
            height = _get_height(tags)
            min_height = _get_min_height(tags)
            roof_material = _get_x(tags, "roof:material")
            roof_shape = _get_x(tags, "roof:shape")
            building_material = _get_x(tags, "building:material")
            building_colour = _get_x(tags, "building:colour")
            
            self.geometry.append(poly)
            self.height.append(height)
            self.min_height.append(min_height)
            self.roof_material.append(roof_material)
            self.roof_shape.append(roof_shape)
            self.building_material.append(building_material)
            self.building_colour.append(building_colour)
        except Exception as e:
            print(e)
            print(a)

In [37]:
h = BuildingHandler()
h.apply_file('data/osm/rec.osm.pbf', locations=True)
gdf = h.get_gdf()

In [40]:
gdf.height.value_counts()

7.0      139940
12.5         95
12.0         70
10.0         42
13.0         42
          ...  
110.0         1
2.1           1
32.0          1
31.0          1
88.0          1
Name: height, Length: 89, dtype: int64

In [39]:
gdf.roof_material.value_counts()

concrete    76
grass        2
glass        1
Name: roof_material, dtype: int64

In [42]:
gdf.roof_shape.value_counts()

gabled       120
skillion      73
flat          38
pyramidal      3
Name: roof_shape, dtype: int64

In [43]:
gdf.building_material.value_counts()

concrete        25
cement_block     5
contentor        1
Name: building_material, dtype: int64

In [35]:
gdf

Unnamed: 0,geometry,min_height,height,roof_material,roof_shape,building_material
0,"MULTIPOLYGON (((-34.87171 -8.06429, -34.87155 ...",0.0,7.0,,gabled,
1,"MULTIPOLYGON (((-34.87398 -8.06525, -34.87389 ...",0.0,7.0,,,
2,"MULTIPOLYGON (((-34.87413 -8.06455, -34.87409 ...",0.0,13.0,,,
3,"MULTIPOLYGON (((-34.87267 -8.06534, -34.87264 ...",0.0,7.0,,,
4,"MULTIPOLYGON (((-34.87218 -8.06516, -34.87202 ...",0.0,7.0,,gabled,
...,...,...,...,...,...,...
140735,"MULTIPOLYGON (((-34.91519 -8.05162, -34.91519 ...",0.0,7.0,,,
140736,"MULTIPOLYGON (((-34.89924 -7.95401, -34.89923 ...",0.0,7.0,,,
140737,"MULTIPOLYGON (((-34.96853 -8.15485, -34.96819 ...",0.0,7.0,,,
140738,"MULTIPOLYGON (((-34.89605 -8.08500, -34.89597 ...",0.0,40.0,,,
