In [1]:
import osmium
import re

import geopandas as gpd
import pandas as pd
import numpy as np

from geopy.geocoders import Nominatim

In [51]:
class RelationHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.count_bid = 0
        self.area_to_bid = {}
        self.relation_to_bid = {}

    def get_area_to_bid(self):
        return self.area_to_bid
        
    def relation(self, r):
        tags = dict(r.tags)
        
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
            return
        # Disqualifiers
        if (tags.get('location') == 'underground' or 'bridge' in tags):
            return
        
        if r.id not in self.relation_to_bid:
            self.relation_to_bid[r.id] = self.count_bid
            self.count_bid +=1
        
        for member in r.members:
            if member.ref not in self.area_to_bid:
                self.area_to_bid[member.ref] = self.relation_to_bid[r.id]

class AreaHandler(osmium.SimpleHandler):

    def __init__(self, area_to_bid):
        osmium.SimpleHandler.__init__(self)
        self.id = []
        self.orig_id = []
        self.building_id = []
        self.tag = []
        self.geometry = []
        self.height = []
        self.min_height = []
        self.wkbfab = osmium.geom.WKBFactory()
        
        self.area_to_bid = area_to_bid
        self.max_bid = max(area_to_bid, key=area_to_bid.get)

        self.LEVEL_HEIGHT = 3.4

    # https://wiki.openstreetmap.org/wiki/Simple_3D_buildings#Other_roof_tags
    def _feet_to_meters(self, s):
        r = re.compile("([0-9]*\.?[0-9]+)'([0-9]*\.?[0-9]+)?\"?")
        m = r.findall(s)[0]
        if len(m[0]) > 0 and len(m[1]) > 0:
            m = float(m[0]) + float(m[1]) / 12.0
        elif len(m[0]) > 0:
            m = float(m[0])
        return m * 0.3048

    def _get_height(self, tags):
        if 'height' in tags:
            # already accounts for roof
            if '\'' in tags['height'] or '\"' in tags['height']:
                return self._feet_to_meters(tags['height'])
            r = re.compile(r"[-+]?\d*\.\d+|\d+")
            return float(r.findall(tags['height'])[0])
        if 'levels' in tags:
            roof_height = 0
            if 'roof_height' in tags:
                if '\'' in tags['roof_height'] or '\"' in tags['roof_height']:
                    roof_height = self._feet_to_meters(tags['roof_height'])
                else:
                    r = re.compile(r"[-+]?\d*\.\d+|\d+")
                    roof_height = float(r.findall(tags['roof_height'])[0])

            # does not account for roof height
            height = float(tags['levels']) * self.LEVEL_HEIGHT
            if 'roof_levels' in tags and roof_height == 0:
                height += float(tags['roof_levels']) * self.LEVEL_HEIGHT
            return height
        return 7.0

    def _get_min_height(self, tags):
        if 'min_height' in tags:
            # already accounts for roof
            if '\'' in tags['min_height'] or '\"' in tags['min_height']:
                return self._feet_to_meters(tags['min_height'])
            r = re.compile(r"[-+]?\d*\.\d+|\d+")
            return float(r.findall(tags['min_height'])[0])
        if 'min_level' in tags:
            height = float(tags['min_level']) * self.LEVEL_HEIGHT
            return height
        return 0.0
        
    def get_gdf(self):
        geometry = gpd.GeoSeries.from_wkb(self.geometry, crs='epsg:4326')
        height = pd.Series(self.height, dtype='float')
        min_height = pd.Series(self.min_height, dtype='float')
        tag = pd.Series(self.tag)
        iid = pd.Series(self.id, dtype='UInt64')
        orig_id = pd.Series(self.orig_id, dtype='UInt64')
        building_id = pd.Series(self.building_id, dtype='UInt64')
        
        return gpd.GeoDataFrame({
            'id': iid,
            'orig_id': orig_id,
            'building_id': building_id,
            'geometry': geometry,
            'min_height': min_height,
            'height': height,
            'tags': tag
        }, index=geometry.index)
    
    def area(self, a):
        tags = dict(a.tags)
        iid = int(a.id)
        orig_id = int(a.orig_id())
        
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
            return
        # Disqualifiers
        if (tags.get('location') == 'underground' or 'bridge' in tags):
            return
        
        if orig_id in self.area_to_bid:
            building_id = self.area_to_bid[orig_id]
        else:
            building_id = self.max_bid
            self.max_bid+=1
        
        try:
            poly = self.wkbfab.create_multipolygon(a)
            height = self._get_height(tags)
            min_height = self._get_min_height(tags)
            
            self.geometry.append(poly)
            self.height.append(height)
            self.min_height.append(min_height)
            self.tag.append(tags)
            self.id.append(iid)
            self.orig_id.append(orig_id)
            self.building_id.append(building_id)
            
        except Exception as e:
            print(e)
            print(a)

In [52]:
h = RelationHandler()
h.apply_file('data/osm/rec.osm.pbf', locations=True)
area_to_bid = h.get_area_to_bid()
h = AreaHandler(area_to_bid)
h.apply_file('data/osm/rec.osm.pbf', locations=True)

In [53]:
gdf = h.get_gdf()
gdf = gdf.to_crs('epsg:3395')
gdf

Unnamed: 0,id,orig_id,building_id,geometry,min_height,height,tags
0,103632422,51816211,372613649,"MULTIPOLYGON (((-3881901.401 -894700.846, -388...",0.0,7.0,"{'addr:housename': 'Armazém 12', 'building': '..."
1,103632432,51816216,372613650,"MULTIPOLYGON (((-3882153.929 -894808.150, -388...",0.0,7.0,"{'building': 'yes', 'building:levels': '5', 'n..."
2,103632434,51816217,372613651,"MULTIPOLYGON (((-3882170.894 -894729.875, -388...",0.0,13.0,"{'addr:city': 'Recife', 'addr:housenumber': '3..."
3,103632436,51816218,372613652,"MULTIPOLYGON (((-3882008.257 -894818.951, -388...",0.0,7.0,"{'addr:city': 'Recife', 'addr:housenumber': '1..."
4,103632440,51816220,372613653,"MULTIPOLYGON (((-3881952.931 -894798.567, -388...",0.0,7.0,"{'addr:housename': 'Armazém 13', 'building': '..."
...,...,...,...,...,...,...,...
140735,2141551196,1070775598,372754368,"MULTIPOLYGON (((-3886741.183 -893286.228, -388...",0.0,7.0,"{'addr:city': 'Recife', 'addr:housenumber': '1..."
140736,2142328828,1071164414,372754369,"MULTIPOLYGON (((-3884965.670 -882385.388, -388...",0.0,7.0,"{'addr:street': 'Rua Trinta e Nove', 'amenity'..."
140737,2143656474,1071828237,372754370,"MULTIPOLYGON (((-3892679.087 -904817.115, -389...",0.0,7.0,{'building': 'warehouse'}
140738,26060041,13030020,372754371,"MULTIPOLYGON (((-3884610.049 -897014.924, -388...",0.0,40.0,{'alt_name': 'Empresarial Riomar Trade Center ...
