In [1]:
import osmium
import re
import datetime

import geopandas as gpd
import pandas as pd
import numpy as np

from geopy.geocoders import Nominatim

### Example from: https://oslandia.com/en/2017/07/03/openstreetmap-data-analysis-how-to-parse-the-data-with-python

In [5]:
class TimelineHandler(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.elemtimeline = []

    def node(self, n):
        self.elemtimeline.append(["node",
                                  n.id,
                                  n.version,
                                  n.visible,
                                  pd.Timestamp(n.timestamp),
                                  n.uid,
                                  n.changeset,
                                  len(n.tags)])

In [6]:
tlhandler = TimelineHandler()
tlhandler.apply_file("data/osm/rec_historical.osm.pbf")
colnames = ['type', 'id', 'version', 'visible', 'ts', 'uid', 'chgset', 'ntags']
elements = pd.DataFrame(tlhandler.elemtimeline, columns=colnames)
elements = elements.sort_values(by=['type', 'id', 'ts'])
elements.head(10)

Unnamed: 0,type,id,version,visible,ts,uid,chgset,ntags
0,node,100179056,1,True,2007-11-06 17:34:48+00:00,6270,422103,1
1,node,100179056,2,True,2008-12-21 23:07:37+00:00,1596,547324,0
2,node,100179056,3,False,2008-12-21 23:09:11+00:00,1596,547324,0
3,node,100179087,1,True,2007-11-06 17:34:49+00:00,6270,422103,1
4,node,100179087,2,True,2008-12-21 22:29:23+00:00,1596,547324,0
5,node,100179087,3,True,2008-12-21 22:29:28+00:00,1596,547324,0
6,node,100179087,4,True,2008-12-21 22:30:03+00:00,1596,547324,0
7,node,100179087,5,True,2008-12-21 22:30:20+00:00,1596,547324,0
8,node,100179087,6,True,2008-12-21 22:30:29+00:00,1596,547324,0
9,node,100179087,7,True,2008-12-21 22:35:02+00:00,1596,547324,0


In [24]:
elements.iloc[0] # What are the metadata available with node/way/relation that we can work with.

type                            node
id                         100179056
version                            1
visible                         True
ts         2007-11-06 17:34:48+00:00
uid                             6270
chgset                        422103
ntags                              1
Name: 0, dtype: object

In [7]:
# class HistoryHandler(osmium.SimpleHandler):
#     def __init__(self):
#         osmium.SimpleHandler.__init__(self)
#         self.count_bid = 0
#         self.area_to_bid = {}
#         self.relation_to_bid = {}

#     def get_area_to_bid(self):
#         return self.area_to_bid

#     def relation(self, r):
#         tags = dict(r.tags)
        
#         # Qualifiers
#         if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
#             return
#         # Disqualifiers
#         if (tags.get('location') == 'underground' or 'bridge' in tags):
#             return

#         # print(r)
        
#         if r.id not in self.relation_to_bid:
#             self.relation_to_bid[r.id] = self.count_bid
#             self.count_bid +=1
        
#         for member in r.members:
#             if member.ref not in self.area_to_bid:
#                 self.area_to_bid[member.ref] = self.relation_to_bid[r.id]

In [8]:
# h = HistoryHandler()
# h.apply_file("data/osm/rec_historical.osm.pbf")
# area_to_bid = h.get_area_to_bid()

In [20]:
class OSMHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.history_data = []
    
    def way(self, w):
        tags = dict(w.tags)
        
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type') == 'building'):
            return
        # Disqualifiers
        if (tags.get('location') == 'underground' or 'bridge' in tags):
            return
        
        self.history_data.append([w.id,
                                  w.version,
                                  w.visible,
                                  pd.Timestamp(w.timestamp),
                                  w.uid,
                                  w.changeset,
                                  len(w.tags),
                                  tags])

In [21]:
handler = OSMHandler()
handler.apply_file("data/osm/rec_historical.osm.pbf")
colnames = ['id', 'version', 'visible', 'ts', 'uid', 'chgset', 'ntags', 'tags']
elements = pd.DataFrame(handler.history_data, columns=colnames)
elements = elements.sort_values(by=['id', 'ts'])
elements.head(10)

# Some cases to be handled:
# In history data there are erroneous buildings which were later rectified or removed:
# For example way 29163144 after version 28, check https://www.openstreetmap.org/way/29163144/history
# Another example way 44118744 was deleted on version 4 10 years ago, check https://www.openstreetmap.org/way/44118744

Unnamed: 0,id,version,visible,ts,uid,chgset,ntags,tags
0,29163144,27,True,2015-09-30 03:22:51+00:00,1772173,34338585,1,{'building': 'yes'}
1,29163144,28,True,2015-09-30 04:03:06+00:00,1772173,34338585,3,"{'building': 'yes', 'natural': 'coastline', 's..."
2,44118744,2,True,2011-03-10 18:03:41+00:00,186193,7516216,2,"{'amenity': 'public_building', 'building': 'yes'}"
3,44118744,3,True,2012-04-12 12:23:22+00:00,614513,11275702,2,"{'amenity': 'public_building', 'building': 'yes'}"
4,51816211,2,True,2010-03-06 22:52:16+00:00,148877,4055921,2,"{'building': 'yes', 'tourism': 'museum'}"
5,51816211,3,True,2015-09-21 21:17:57+00:00,1772173,34171819,3,"{'building': 'yes', 'roof:shape': 'gabled', 't..."
6,51816211,4,True,2015-12-14 12:31:36+00:00,31385,35944309,4,"{'addr:housename': 'Armazém 12', 'building': '..."
7,51816211,5,True,2022-02-17 13:07:05+00:00,8107931,117520835,3,"{'addr:housename': 'Armazém 12', 'building': '..."
8,51816216,2,True,2010-03-06 22:52:16+00:00,148877,4055921,3,"{'building': 'yes', 'name': 'Livraria Cultura'..."
9,51816216,3,True,2015-09-21 20:03:44+00:00,1772173,34170456,3,"{'building': 'yes', 'name': 'Livraria Cultura'..."


In [24]:
# Also, the latest version number is also available on the latest pbf file

In [None]:
# resolve the issue of relation id and way id! Take help from professor!!