In [46]:
import osmium
import re

import geopandas as gpd
import pandas as pd
import numpy as np

from geopy.geocoders import Nominatim

from collections import defaultdict

In [54]:
class WayHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.id = []
        self.tag = []
        self.height = []
        self.min_height = []

        self.LEVEL_HEIGHT = 3.4

    # https://wiki.openstreetmap.org/wiki/Simple_3D_buildings#Other_roof_tags
    def _feet_to_meters(self, s):
        r = re.compile("([0-9]*\.?[0-9]+)'([0-9]*\.?[0-9]+)?\"?")
        m = r.findall(s)[0]
        if len(m[0]) > 0 and len(m[1]) > 0:
            m = float(m[0]) + float(m[1]) / 12.0
        elif len(m[0]) > 0:
            m = float(m[0])
        return m * 0.3048

    def _get_height(self, tags):
        if 'height' in tags:
            # already accounts for roof
            if '\'' in tags['height'] or '\"' in tags['height']:
                return self._feet_to_meters(tags['height'])
            r = re.compile(r"[-+]?\d*\.\d+|\d+")
            return float(r.findall(tags['height'])[0])
        if 'levels' in tags:
            roof_height = 0
            if 'roof_height' in tags:
                if '\'' in tags['roof_height'] or '\"' in tags['roof_height']:
                    roof_height = self._feet_to_meters(tags['roof_height'])
                else:
                    r = re.compile(r"[-+]?\d*\.\d+|\d+")
                    roof_height = float(r.findall(tags['roof_height'])[0])

            # does not account for roof height
            height = float(tags['levels']) * self.LEVEL_HEIGHT
            if 'roof_levels' in tags and roof_height == 0:
                height += float(tags['roof_levels']) * self.LEVEL_HEIGHT
            return height

        return 7.0 # Should I return N/A -> Ask professor!!

    def _get_min_height(self, tags):
        if 'min_height' in tags:
            # already accounts for roof
            if '\'' in tags['min_height'] or '\"' in tags['min_height']:
                return self._feet_to_meters(tags['min_height'])
            r = re.compile(r"[-+]?\d*\.\d+|\d+")
            return float(r.findall(tags['min_height'])[0])
        if 'min_level' in tags:
            height = float(tags['min_level']) * self.LEVEL_HEIGHT
            return height
        return 0.0
        
    def get_df(self):
        height = pd.Series(self.height, dtype='float')
        min_height = pd.Series(self.min_height, dtype='float')
        tag = pd.Series(self.tag)
        iid = pd.Series(self.id, dtype='UInt64')
        
        return pd.DataFrame({
            'id': iid,
            'min_height': min_height,
            'height': height,
            'tags': tag
        })
    
    def way(self, w):
        tags = dict(w.tags)
        id = int(w.id)
        
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
            return
        # Disqualifiers
        if (tags.get('location') == 'underground' or 'bridge' in tags):
            return

        # pointList = []
        # for p in list(w.nodes):
        #     pointList.append(geometry.Point(p.lat, p.lon))
        # poly = geometry.Polygon([[p.lat, p.lon] for p in pointList])

        # print(poly.wkt)

        # Manage way and relation together
        
        try:
            height = self._get_height(tags)
            min_height = self._get_min_height(tags)
            self.height.append(height)
            self.min_height.append(min_height)
            self.tag.append(tags)
            self.id.append(id)
            
        except Exception as e:
            print(e)
            print(w)

In [57]:
h = WayHandler()
h.apply_file('data/osm/rec.osm.pbf', locations=True)
df = h.get_df()
df

Unnamed: 0,id,min_height,height,tags
0,51816211,0.0,7.0,"{'addr:housename': 'Armazém 12', 'building': '..."
1,51816216,0.0,7.0,"{'building': 'yes', 'building:levels': '5', 'n..."
2,51816217,0.0,13.0,"{'addr:city': 'Recife', 'addr:housenumber': '3..."
3,51816218,0.0,7.0,"{'addr:city': 'Recife', 'addr:housenumber': '1..."
4,51816220,0.0,7.0,"{'addr:housename': 'Armazém 13', 'building': '..."
...,...,...,...,...
140362,1064791239,0.0,7.0,"{'building': 'apartments', 'building:levels': ..."
140363,1064791240,0.0,7.0,{'building': 'apartments'}
140364,1070775598,0.0,7.0,"{'addr:city': 'Recife', 'addr:housenumber': '1..."
140365,1071164414,0.0,7.0,"{'addr:street': 'Rua Trinta e Nove', 'amenity'..."


In [23]:
# Gather all tags of current recife buildings into a list
all_tags = []
mp = defaultdict(lambda: 0)
for tag in df['tags']:
    for key in tag:
        if(not mp[key] and key != 'height' and key != 'min_height'):
            mp[key] = 1
            all_tags.append(key)
all_tags.sort()

In [25]:
df = df.reindex(columns=[*df.columns.tolist(), *all_tags], fill_value=np.nan) # Add all tags as separate pandas column with default value nan

# Check if tag has value for a building. If so, add it to its corresponding new column
n = len(df)
for i in range(n):
    tags = df.iloc[i]['tags']

    for key in tags:
        df.at[i, key] = tags[key]

In [36]:
df.drop(columns=['tags'], inplace=True)
df.to_csv('all-building-features.csv') # Save in a csv file

In [45]:
# count of how many building has such tags in recife
n = len(df)
tag_counts = df.isnull().sum(axis = 0)
for tag, counts in tag_counts.iteritems():
    print(tag, n - counts)

id 140740
orig_id 140740
building_id 140740
geometry 140740
min_height 140740
height 140740
PrefRecife:escola_codigo 16
PrefRecife:escola_tipo 16
PrefRecife:mec_codigo 16
abandoned 1
abandoned:building 2
access 37
addr:city 2354
addr:flats 14
addr:hamlet 1
addr:housename 31
addr:housenumber 10567
addr:place 14
addr:postcode 1236
addr:street 11535
addr:suburb 2251
addr:unit 14
admin_level 1
advertising 1
aeroway 24
air_conditioning 37
alt_name 170
amenity 1306
animal 18
architect 3
area 11
area:highway 3
artist_name 1
artwork_type 4
atm 13
attraction 8
automated 2
baby_feeding 2
bar 2
barrier 2
bench 2
branch 1
brand 245
brand:wikidata 241
brand:wikipedia 234
brewery 7
building 140708
building:colour 153
building:flats 17
building:levels 1272
building:material 31
building:min_level 4
building:part 356
building_1 4
bus 67
capacity 10
capacity:disabled 4
capacity:persons 1
castle_type 2
changing_table 3
clothes 4
club 8
construction 15
consulate 2
contact:email 1
contact:facebook 2
contac