In [None]:
# install required packages
import sys
!{sys.executable} -m pip install osmium pandas

In [None]:
import osmium as osm
import pandas as pd

In [None]:
class TimelineHandler(osm.SimpleHandler):
    def __init__(self):
        osm.SimpleHandler.__init__(self)
        self.elemtimeline = []

    def element(self, e, elem_type):
        self.elemtimeline.append([elem_type,
                                  e.id,
                                  e.version,
                                  e.visible,
                                  pd.Timestamp(e.timestamp),
                                  e.uid,
                                  e.changeset,
                                  len(e.tags)])

    def node(self, n):
        self.element(n, "node")

    def way(self, w):
        self.element(w, "way")

    def relation(self, r):
        self.element(r, "relation")

In [None]:
tlhandler = TimelineHandler()
tlhandler.apply_file("data/ottgat.osh.pbf")
colnames = ['type', 'id', 'version', 'visible', 'ts', 'uid', 'chgset', 'ntags']
elements = pd.DataFrame(tlhandler.elemtimeline, columns=colnames)
elements = elements.sort_values(by=['type', 'id', 'ts'])

elements.to_csv("output/ottgat_elem.csv", date_format='%Y-%m-%d %H:%M:%S')

In [None]:
def datedelems(history, date):
    datedelems = (history.query("ts <= @date")
                  .groupby(['type','id'])['version']
                  .max()
                  .reset_index())
    return pd.merge(datedelems, history, on=['type','id','version'])

def osm_stats(osm_history, timestamp):
    osmdata = datedelems(osm_history, timestamp)
    nb_nodes = len(osmdata.query('type == "node"'))
    nb_ways = len(osmdata.query('type == "way"'))
    nb_relations = len(osmdata.query('type == "relation"'))
    nb_users = osmdata.uid.nunique()
    nb_chgsets = osmdata.chgset.nunique()
    return [nb_nodes, nb_ways, nb_relations, nb_users, nb_chgsets]

def osm_chronology(history, start_date, end_date):
    timerange = pd.date_range(start_date, end_date, freq="1M").values
    osmstats = [osm_stats(history, str(date)) for date in timerange]
    osmstats = pd.DataFrame(osmstats, index=timerange,
                            columns=['n_nodes', 'n_ways', 'n_relations',
                                     'n_users', 'n_chgsets'])
    return osmstats

In [None]:
osmstats = osm_stats(elements, "2009-03-30")

In [None]:
chrono_data = osm_chronology(elements, "2007-01-01", "2017-10-01")

In [None]:
print chrono_data

chrono_data.to_csv("output/ottgat_chrono.csv")