## Mapping Waypoints onto the berlin street network
This notebook map's the waypoints in the data onto the chosen berlin street network. Here, we take the Berlin GHS as the relevant shape. 
The notebook maps all waypoints within the Berlin GHS onto the nearest edge, adds the nearest edge to the data frame, as well as the distance to the nearest edge. Waypoints outside the GHS are dropped.

In [10]:
# Imports, options
import geopandas as gpd
import multiprocessing as mp
import numpy as np
import os
import osmnx as ox
import pandas as pd
import time

from geopandas.tools import sjoin
from config import rpath
os.chdir(rpath)

gpd.options.use_pygeos = True

In [16]:
# Get Waypoint Files
path = "data/Berlin_2017/data/waypoints"
files = []
for file in os.listdir(path):
    if file.endswith("edges.csv") or file.endswith("small.csv") or file[:-4] + "-with-edges.csv" in os.listdir(path):
        continue
    files.append(os.path.join(path,file))
files.sort()
colnames = ['TripID','WaypointSequence','CaptureDate','lat','lon','SegmentID', 
            'ZoneName','Frc','DeviceID','RawSpeed','RawSpeedMetric']
keepcols = ['TripID','WaypointSequence','CaptureDate','lat','lon','DeviceID','RawSpeed']

In [7]:
# Get OSMNX network and Berlin GHS shape
G = ox.io.load_graphml("data/graphs/berlin-2851.graphml")
G = ox.projection.project_graph(G, 25833)
berlin_uc = gpd.read_file("data/geodata/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg")
berlin_uc = berlin_uc[berlin_uc.UC_NM_MN=="Berlin"]
berlin_uc = berlin_uc[["UC_NM_MN", "geometry"]]
berlin_uc = berlin_uc.to_crs(25833)

In [8]:
# Define function to read file and map waypoints onto the network
def read_and_map_wpts(file, verbose=False):
    ''' Reads a waypoint file, projects it to UTM 33N, filters out data outside of the Berlin GHS, 
        gets the nearest edge in the Berlin network and the distance to this edge, and saves the file.
    '''
    
    if verbose:
        print("Reading " + file)
        t1 = time.time()
    gdf = pd.read_csv(file, names=colnames)[keepcols] # Read file
    
    if verbose:
        print("Converting " + file)
        t2 = time.time()
    gdf = gpd.GeoDataFrame(gdf, geometry=gpd.points_from_xy(gdf.lon, gdf.lat)) # convert to GeoDataFrame
    
    if verbose:
        print("Setting crs for " + file)
        t3 = time.time()
    gdf = gdf.set_crs(4326).to_crs(25833) # Set correct CRS and convert to UTM 33N
    
    if verbose:
        print("Joining " + file + " with Berlin.")
        t4 = time.time()
    gdf = sjoin(gdf, berlin_uc, how="left") # Intersect with Berlin GHS area 
    gdf = gdf.dropna(subset=["UC_NM_MN"]) # Keep only intersecting entries
    gdf["x"] = gdf.geometry.x # Get coordinates as column
    gdf["y"] = gdf.geometry.y
    
    # Get nearest edges
    if verbose:
        print("Getting nearest edges of " + file)
        t5 = time.time()
    edges = ox.distance.nearest_edges(G, gdf.x, gdf.y, return_dist=True, interpolate=50)
    
    # Add them to the gdf including distance to nearest edge
    gdf["edge_1"] = [e[0] for e in edges[0]]
    gdf["edge_2"] = [e[1] for e in edges[0]]
    gdf["edge_dist"] = [d for d in edges[1]]
    
    # Save as csv
    outfile = file[:-4] + "-with-edges.csv"
    if verbose:
        print("Saving csv for " + file)
        t6 = time.time()
    gdf = pd.DataFrame(gdf.drop(columns='geometry')).to_csv(outfile)
    if verbose:
        t7 = time.time()
        print("File " + outfile + " written!")
        return [t1,t2,t3,t4,t5,t6,t7] # Return times if verbose = True
    return None

In [None]:
# Execute above function with 5 cores
pool = mp.Pool(processes=5)
df_list = pool.map(read_and_map_wpts, files)
pool.close()