In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# conda create --prefix ./envs
# conda activate ./envs
# conda install ipykernel pandas joblib matplotlib seaborn geopandas tqdm
# conda install conda-forge::osmium-tool
# pip install osmium geopy

In [15]:
import pandas as pd
from joblib import Parallel, delayed
import pickle

from tqdm.notebook import tqdm_notebook
import time
import numpy as np
import matplotlib.pyplot as plt
import json
# import point shapely
from shapely.geometry import Point

from store_osm_data import *
from extract_history_data import *
from extract_latest_data import *
from util import *

In [None]:
store_osm_data('rio', 'south-america-latest.osm.pbf', geo_loc = 'Rio De Janeiro, Brazil')
# or, 
# store_osm_data('rec', 'south-america-latest.osm.pbf', [-8.155187,-7.928967,-35.018648,-34.851540])

In [None]:
l = LatestHandler()
l.apply_file("data/osm/latest/" + "rio.osm.pbf")

pickle_out = open("data/parsed_osm/latest/" + "rio.pickle","wb") 
pickle.dump(l.latest, pickle_out)

pickle_out.close()

In [34]:
class LatestHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.wkbfab = osmium.geom.WKBFactory()
        self.latest = []
        self.nodes = []

    def read_parsed_data(self, city):
        pickle_in = open("data/parsed_osm/latest/%s.pickle"%(city),"rb")
        self.latest = pickle.load(pickle_in)

    def get_gdf(self, data):
        id = pd.Series([row[0] for row in data], dtype='UInt64')
        tags = pd.Series([row[1] for row in data])
        osm_type = pd.Series([row[2] for row in data], dtype='string')
        subtype = pd.Series([row[3] for row in data], dtype='string')
        geometry = gpd.GeoSeries.from_wkb([row[4] for row in data], crs='epsg:4326')
        ts = pd.Series([row[5] for row in data]).dt.tz_localize(None)
        n_ids = pd.Series([row[6] for row in data])

        return gpd.GeoDataFrame({
            'id': id,
            'tags': tags,
            'osm_type':  osm_type,
            'subtype': subtype,
            'geometry': geometry,
            'ts': ts,
            'nodes': n_ids
        })

    def filter_data(self, qualifier):
        filtered = [i for i in self.latest if qualifier(i[1],i[2])]
        return filtered

    def way(self, w):
        id = w.id
        tags = dict(w.tags)
        
        # print(id)
        if('highway' in tags):
            try:
                n_ids = []
                for node in w.nodes:
                    n = str(node) # 6106831872@-43.4685197/-22.8788151
                    n_id = int(n.split('@')[0])
                    lon = float(n.split('@')[1].split('/')[0])
                    lat = float(n.split('@')[1].split('/')[1])
                    
                    n_ids.append(n_id)
                    self.nodes.append([n_id, lon, lat])
                
                line = self.wkbfab.create_linestring(w)
                self.latest.append([
                    id,         # Ids
                    tags,       # Tags
                    'W',        # OSM Type
                    'Highway',  # Subtype
                    line,       # Geometry
                    pd.Timestamp(w.timestamp),
                    n_ids
                ])
                
            except Exception as e:
                print(e)
                print(w)

In [35]:
osm_file = 'data/osm/latest/rio.osm.pbf'
l = LatestHandler()
l.apply_file(osm_file, locations = True)

street = l.filter_data(highway_qualifier)
street = l.get_gdf(street)

In [36]:
street

Unnamed: 0,id,tags,osm_type,subtype,geometry,ts,nodes
0,4217292,"{'highway': 'residential', 'lanes': '2', 'lit'...",W,Highway,"LINESTRING (-43.20285 -22.98436, -43.20285 -22...",2024-02-02 14:48:25,"[25038509, 1587555238, 1587555157, 25038500, 2..."
1,4217293,"{'highway': 'residential', 'lanes': '2', 'lit'...",W,Highway,"LINESTRING (-43.20518 -22.98645, -43.20515 -22...",2023-01-20 09:48:27,"[25038522, 3738891781, 8853165343, 3738891775,..."
2,4217294,"{'highway': 'residential', 'lanes': '2', 'lit'...",W,Highway,"LINESTRING (-43.20049 -22.98095, -43.20051 -22...",2023-01-20 09:50:52,"[2296646450, 25038487]"
3,4217295,"{'bicycle': 'yes', 'cycleway:left': 'lane', 'c...",W,Highway,"LINESTRING (-43.19874 -22.98678, -43.19876 -22...",2023-01-20 09:18:13,"[25038525, 8905208766, 1587555355, 1587555333]"
4,4217297,"{'highway': 'residential', 'lanes': '2', 'name...",W,Highway,"LINESTRING (-43.20657 -22.98051, -43.20664 -22...",2023-01-20 09:47:34,"[50935728, 3358843444, 1587555101, 7904767809,..."
...,...,...,...,...,...,...,...
118697,1258907218,"{'alt_name': 'Travessa Quarenta e Nove', 'high...",W,Highway,"LINESTRING (-43.23685 -22.86790, -43.23612 -22...",2024-03-08 02:05:44,"[6462844341, 6501199724]"
118698,1258907219,"{'alt_name': 'Travessa Quarenta e Oito', 'high...",W,Highway,"LINESTRING (-43.23765 -22.86830, -43.23689 -22...",2024-03-08 02:05:44,"[6324551097, 6462844342]"
118699,1258907220,"{'alt_name': 'Travessa Cinquenta', 'highway': ...",W,Highway,"LINESTRING (-43.23689 -22.86823, -43.23615 -22...",2024-03-08 02:05:44,"[6462844342, 6501199725]"
118700,1258907221,"{'alt_name': 'Travessa Cinquenta e Dois', 'hig...",W,Highway,"LINESTRING (-43.23615 -22.86816, -43.23540 -22...",2024-03-08 02:05:44,"[6501199725, 6501179201]"


In [38]:
# l.nodes
# create a geodataframe for nodes
nodes = pd.DataFrame(l.nodes, columns = ['id', 'lon', 'lat'])
nodes = gpd.GeoDataFrame(nodes, geometry=gpd.points_from_xy(nodes.lon, nodes.lat))
nodes = nodes.set_crs('epsg:4326')
nodes = nodes.drop(columns = ['lon', 'lat'])

In [43]:
nodes

Unnamed: 0,id,geometry
0,25038509,POINT (-43.20285 -22.98436)
1,1587555238,POINT (-43.20285 -22.98422)
2,1587555157,POINT (-43.20279 -22.98341)
3,25038500,POINT (-43.20278 -22.98329)
4,25038494,POINT (-43.20271 -22.98221)
...,...,...
776938,6501199725,POINT (-43.23615 -22.86816)
776939,6501199725,POINT (-43.23615 -22.86816)
776940,6501179201,POINT (-43.23540 -22.86810)
776941,8974447542,POINT (-43.34897 -22.87390)
