In [3]:
import osmium
import geopandas as gpd

import pandas as pd
import numpy as np

import re

from collections import defaultdict

In [None]:
class RelationHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.history_relation = []
        self.count_bid = 1
        self.to_bid = defaultdict(lambda: 0)
        
    def relation(self, r):
        tags = dict(r.tags)

        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
            return
        # Disqualifiers
        if (tags.get('location') == 'underground' or 'bridge' in tags):
            return

        if not self.to_bid[r.id]:
            self.to_bid[r.id] = self.count_bid
            self.count_bid += 1

        for member in r.members:
            if not self.to_bid[member.ref]:
                self.to_bid[member.ref] = self.to_bid[r.id]

    
class AreaHandler(osmium.SimpleHandler):

    def __init__(self, to_bid):
        osmium.SimpleHandler.__init__(self)
        self.id = []
        self.building_id = []
        self.tag = []
        self.geometry = []
        self.height = []
        self.min_height = []
        self.wkbfab = osmium.geom.WKBFactory()
        
        self.to_bid = to_bid
        max_key = max(self.to_bid, key=self.to_bid.get)
        self.count_bid = self.to_bid[max_key]

        self.LEVEL_HEIGHT = 3.4

    # https://wiki.openstreetmap.org/wiki/Simple_3D_buildings#Other_roof_tags
    def _feet_to_meters(self, s):
        r = re.compile("([0-9]*\.?[0-9]+)'([0-9]*\.?[0-9]+)?\"?")
        m = r.findall(s)[0]
        if len(m[0]) > 0 and len(m[1]) > 0:
            m = float(m[0]) + float(m[1]) / 12.0
        elif len(m[0]) > 0:
            m = float(m[0])
        return m * 0.3048

    def _get_height(self, tags):
        if 'height' in tags:
            # already accounts for roof
            if '\'' in tags['height'] or '\"' in tags['height']:
                return self._feet_to_meters(tags['height'])
            r = re.compile(r"[-+]?\d*\.\d+|\d+")
            return float(r.findall(tags['height'])[0])
        if 'levels' in tags:
            roof_height = 0
            if 'roof_height' in tags:
                if '\'' in tags['roof_height'] or '\"' in tags['roof_height']:
                    roof_height = self._feet_to_meters(tags['roof_height'])
                else:
                    r = re.compile(r"[-+]?\d*\.\d+|\d+")
                    roof_height = float(r.findall(tags['roof_height'])[0])

            # does not account for roof height
            height = float(tags['levels']) * self.LEVEL_HEIGHT
            if 'roof_levels' in tags and roof_height == 0:
                height += float(tags['roof_levels']) * self.LEVEL_HEIGHT
            return height
        return 7.0

    def _get_min_height(self, tags):
        if 'min_height' in tags:
            # already accounts for roof
            if '\'' in tags['min_height'] or '\"' in tags['min_height']:
                return self._feet_to_meters(tags['min_height'])
            r = re.compile(r"[-+]?\d*\.\d+|\d+")
            return float(r.findall(tags['min_height'])[0])
        if 'min_level' in tags:
            height = float(tags['min_level']) * self.LEVEL_HEIGHT
            return height
        return 0.0
        
    def get_gdf(self):
        geometry = gpd.GeoSeries.from_wkb(self.geometry, crs='epsg:4326')
        height = pd.Series(self.height, dtype='float')
        min_height = pd.Series(self.min_height, dtype='float')
        tag = pd.Series(self.tag)
        iid = pd.Series(self.id, dtype='UInt64')
        orig_id = pd.Series(self.orig_id, dtype='UInt64')
        building_id = pd.Series(self.building_id, dtype='UInt64')
        
        return gpd.GeoDataFrame({
            'id': iid,
            'building_id': building_id,
            'geometry': geometry,
            'min_height': min_height,
            'height': height,
            'tags': tag
        }, index=geometry.index)
    
    def area(self, a):
        tags = dict(a.tags)
        iid = int(a.id)
        orig_id = int(a.orig_id())
        
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
            return
        # Disqualifiers
        if (tags.get('location') == 'underground' or 'bridge' in tags):
            return
        
        if orig_id in self.area_to_bid:
            building_id = self.area_to_bid[orig_id]
        elif orig_id in self.relation_to_bid:
            building_id = self.relation_to_bid[orig_id]
        else:
            building_id = self.max_bid
            self.max_bid+=1
        
        try:
            poly = self.wkbfab.create_multipolygon(a)
            height = self._get_height(tags)
            min_height = self._get_min_height(tags)
            
            self.geometry.append(poly)
            self.height.append(height)
            self.min_height.append(min_height)
            self.tag.append(tags)
            self.id.append(iid)
            self.orig_id.append(orig_id)
            self.building_id.append(building_id)
            
        except Exception as e:
            print(e)
            print(a)  

In [76]:
# Check if can divide a geodataframe into 1km * 1km cells. Rest of it should be similar to direct-trust-indicators.

xmin, ymin, xmax, ymax= gdf_poly.total_bounds
cell_width = cell_height = size
cells = []
for x0 in np.arange(xmin, xmax+cell_width, cell_width):
    for y0 in np.arange(ymin, ymax+cell_height, cell_height):
        x1 = x0-cell_width
        y1 = y0+cell_height
        new_cell = box(x0, y0, x1, y1)
        cells.append(new_cell)

cells = gpd.overlay(gdf_poly, gpd.GeoDataFrame({'geometry': gpd.GeoSeries(cells)}),how='intersection',keep_geom_type=True).values