In [1]:
# install required packages
import sys
!{sys.executable} -m pip install osmium pandas

[33mDEPRECATION: Python 2.7 will reach the end of its life on January 1st, 2020. Please upgrade your Python as Python 2.7 won't be maintained after that date. A future version of pip will drop support for Python 2.7.[0m
Collecting osmium
[?25l  Downloading https://files.pythonhosted.org/packages/5e/7b/c758044c288b1e3f06247764754ed952c4f1136d7ee74a5db371db624641/osmium-2.15.2-cp27-cp27mu-manylinux1_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 118kB/s eta 0:00:01
[?25hCollecting pandas
[?25l  Downloading https://files.pythonhosted.org/packages/db/83/7d4008ffc2988066ff37f6a0bb6d7b60822367dcb36ba5e39aa7801fda54/pandas-0.24.2-cp27-cp27mu-manylinux1_x86_64.whl (10.1MB)
[K     |████████████████████████████████| 10.1MB 213kB/s eta 0:00:01
[?25hCollecting pytz>=2011k (from pandas)
[?25l  Downloading https://files.pythonhosted.org/packages/3d/73/fe30c2daaaa0713420d0382b16fbb761409f532c56bdcc514bf7b6262bb6/pytz-2019.1-py2.py3-none-any.whl (510kB)
[K     |██████████

In [2]:
import osmium as osm
import pandas as pd

In [3]:
class TimelineHandler(osm.SimpleHandler):
    def __init__(self):
        osm.SimpleHandler.__init__(self)
        self.elemtimeline = []

    def element(self, e, elem_type):
        self.elemtimeline.append([elem_type,
                                  e.id,
                                  e.version,
                                  e.visible,
                                  pd.Timestamp(e.timestamp),
                                  e.uid,
                                  e.changeset,
                                  len(e.tags)])

    def node(self, n):
        self.element(n, "node")

    def way(self, w):
        self.element(w, "way")

    def relation(self, r):
        self.element(r, "relation")

In [4]:
tlhandler = TimelineHandler()
tlhandler.apply_file("data/ottgat.osh.pbf")
colnames = ['type', 'id', 'version', 'visible', 'ts', 'uid', 'chgset', 'ntags']
elements = pd.DataFrame(tlhandler.elemtimeline, columns=colnames)
elements = elements.sort_values(by=['type', 'id', 'ts'])

elements.to_csv("output/ottgat_elem.csv", date_format='%Y-%m-%d %H:%M:%S')

In [5]:
def datedelems(history, date):
    datedelems = (history.query("ts <= @date")
                  .groupby(['type','id'])['version']
                  .max()
                  .reset_index())
    return pd.merge(datedelems, history, on=['type','id','version'])

def osm_stats(osm_history, timestamp):
    osmdata = datedelems(osm_history, timestamp)
    nb_nodes = len(osmdata.query('type == "node"'))
    nb_ways = len(osmdata.query('type == "way"'))
    nb_relations = len(osmdata.query('type == "relation"'))
    nb_users = osmdata.uid.nunique()
    nb_chgsets = osmdata.chgset.nunique()
    return [nb_nodes, nb_ways, nb_relations, nb_users, nb_chgsets]

def osm_chronology(history, start_date, end_date):
    timerange = pd.date_range(start_date, end_date, freq="1M").values
    osmstats = [osm_stats(history, str(date)) for date in timerange]
    osmstats = pd.DataFrame(osmstats, index=timerange,
                            columns=['n_nodes', 'n_ways', 'n_relations',
                                     'n_users', 'n_chgsets'])
    return osmstats

In [6]:
osmstats = osm_stats(elements, "2009-03-30")

In [7]:
chrono_data = osm_chronology(elements, "2007-01-01", "2017-10-01")

In [8]:
print chrono_data

chrono_data.to_csv("output/ottgat_chrono.csv")

            n_nodes  n_ways  n_relations  n_users  n_chgsets
2007-01-31     1444     332            0        5         39
2007-02-28     1473     339            0        5         43
2007-03-31     1837     426            0        6         50
2007-04-30     1951     445            0        7         53
2007-05-31     2290     477            0        9         55
2007-06-30     3764     523            0       10         61
2007-07-31    15399    1451            0       14         88
2007-08-31    23207    3318            8       15        120
2007-09-30    26682    3615           10       17        141
2007-10-31    36023    4314           12       22        181
2007-11-30    41313    5020           12       27        223
2007-12-31    49781    5637           12       26        274
2008-01-31    50855    5739           12       28        297
2008-02-29    53335    6137           13       31        318
2008-03-31    57990    6644           24       38        362
2008-04-30    58096    6