In [1]:
import pandas as pd
import numpy as np

import networkx as nx
import osmnx as ox

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Polygon
import matplotlib

from joypy import joyplot

from shapely import geometry

from geopandas.tools import sjoin
from geopandas import GeoDataFrame
import geopandas as gpd

from shapely import geometry
from shapely.geometry import Point
from shapely.geometry import Polygon

from shapely.geometry import LineString, mapping
import geopandas as gpd
from ipyleaflet import *

import math

from math import radians, cos, sin, asin, sqrt

#from Functions import *
import Functions
print('loaded libraries')


loaded libraries
loaded libraries


## Functions

In [2]:
def processRealRoutesIntoSteps(G,real, nextLim, value, title):
    """
    segment route into segments based on few samples (approx 200 - 300 meters)
    every sample created more noisy routes
    input: Graph G, limits and value (how much distance), title of the route (to save it)
    output: recreated route .csv
    
    """
    
    real = real.dropna(subset = 'gpsLongitude')

    avgPollution = []
    dictionary = {'avgPollution':[], 'cumulativeDistance':[], 'sumPollution':[],
                 'gpsLongitude':[], 'gpsLatitude':[], 'node':[]}
    pollution = []
    sumPollution = 0

    init = real.iloc[0] # get starting point
    node = ox.nearest_nodes(G, init['gpsLongitude'], init['gpsLatitude']) 
    
    dictionary['avgPollution'].append(np.mean(pollution))
    dictionary['cumulativeDistance'].append(init['cumulativeDistance'])
    dictionary['sumPollution'].append(0)
    dictionary['gpsLongitude'].append(init['gpsLongitude'])
    dictionary['gpsLatitude'].append(init['gpsLatitude'])
    dictionary['node'].append(node)

    for index, row in real.iterrows():

        if row['cumulativeDistance'] < nextLim: # go through segments
            
            pollution.append(row['pm2_5'])
            
        else:
            
            #find new node according to that new segment starting point 
            node = ox.nearest_nodes(G, row['gpsLongitude'], row['gpsLatitude']) 
            
            sumPollution = sumPollution + np.mean(pollution)
            # update dictioanary 
            dictionary['avgPollution'].append(np.mean(pollution))
            dictionary['cumulativeDistance'].append(nextLim)
            dictionary['sumPollution'].append(np.mean(sumPollution))
            dictionary['gpsLongitude'].append(row['gpsLongitude'])
            dictionary['gpsLatitude'].append(row['gpsLatitude'])
            dictionary['node'].append(node)
            pollution = [] # start again
            pollution.append(row['pm2_5'])
            nextLim = nextLim + value

    finalValue = len(real.index) - 1
    end = real.iloc[finalValue] # consider last segments
    
    sumPollution = sumPollution + np.mean(pollution)
    node = ox.nearest_nodes(G, end['gpsLongitude'], end['gpsLatitude'])
    # update dictioanary 
    dictionary['avgPollution'].append(np.mean(pollution))
    dictionary['cumulativeDistance'].append(end['cumulativeDistance'])
    dictionary['sumPollution'].append(np.mean(sumPollution))
    dictionary['gpsLongitude'].append(end['gpsLongitude'])
    dictionary['gpsLatitude'].append(end['gpsLatitude'])
    dictionary['node'].append(node)
   
    realValues = pd.DataFrame(dictionary)
    #display(realValues.tail(10))
    
    realValues.to_csv('./LondonJourneysProcessed/recreatedRoutes/'+ title + '_recreatedRoute.csv', index = False)
    print('File', title,'successfully saved at "./LondonJourneysProcessed/recreatedRoutes/"')
    return realValues

def recreateRouteByDijkstra(G, realValues):
    """
    recreate routes with Dijkstra every few samples
    every sample created more noisy routes
    input: Graph G, limits and value (how much distance), title of the route (to save it)
    output: recreated route nodes traversed
    """
    
    finalPath = []
    
    for i in range(len(realValues.index) - 1):
        
        row = realValues.iloc[i]
        nextRow = realValues.iloc[i + 1]
        route = nx.shortest_path(G, row['node'], nextRow['node'], 'length')
        
        for i in route:
            
            finalPath.append(int(i))
    
    list_str = repr(finalPath)
    prev = 0 #finalPath[0]
    finalPathF = []
    
    for i in finalPath:
        
        # avoid adding last node from previous segment (start and ending nodes for continous segments are the same)
        if i != prev: 
            finalPathF.append(i)
            
        prev = i
       
    file = './LondonJourneysProcessed/recreatedRoutes/'+ title + '_recreatedRoute.txt' 
    file = open(file,"a")
    file.write(list_str)
    file.close()
    return finalPath

def plotRecreatedRoute(finalPath, title, G):
    """ 
    plot route in ipyleafleat as html file (so interactive)
    input: path to save file, title, and graph G
    output: none (but creates the file in the assigned folder)
    """
    
    location_point = (51.498732, -0.191946)

    nodes = ox.graph_to_gdfs(G, edges=False) # load graph nodes

    path_points = nodes.loc[finalPath] # obtain path points

    m = Map(center=location_point, zoom=13, basemap=basemaps.CartoDB.Positron)
    
    # get values from new path and turn them into a geodataframe

    path = gpd.GeoDataFrame([LineString(path_points.geometry.values)], columns=['geometry'])
    
    #plot
    path_layer = GeoData(geo_dataframe=path, style={'color':'gray', 'weight':4, 'dashArray': '8'})
    
    m.add_layer(path_layer)

    m.layout.width = '800px'  # Adjust the desired width
    m.layout.height = '800px'
    m.save('./LondonJourneysProcessed/recreatedRoutes/'+ title + '_recreatedRoute.html')

## Load Graph

In [3]:
G = ox.load_graphml('./data/ProcessedGraphs/8000/squared-grid.graphml')
print(G)

MultiDiGraph with 21208 nodes and 61844 edges


## Load appropriate Route file

In [4]:
# Load appropriate Route file

title = './LondonJourneysProcessed/2023-08-03AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_2.csv'
title = './LondonJourneysProcessed/2023-07-20AirSpeckLogsOutdoorWalkingF44182EC762B__route_3.csv'


title = './LondonJourneysProcessed/2023-07-28AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_1.csv'
title = './LondonJourneysProcessed/2023-07-28AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_2.csv'
title = './LondonJourneysProcessed/2023-07-28AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_3.csv'

#2023-08-01AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_1
title = './LondonJourneysProcessed/2023-08-01AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_2.csv'

title = "./LondonJourneysProcessed/2023-08-02AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_1.csv"

#title = "./LondonJourneysProcessed/2023-08-03AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_1.csv"

real = pd.read_csv(title)
title = title.split('/')[-1].replace('.csv','')


## Obtain Recreated Route from Steps

You can play with how much segmentation you want to recreate the route (every 200 or 500 or so meters)
it is not going to be perfect, but it will approximate. Longer distance segmentation
will tend to find a different shorter route, but very small distances will also get noisy as there is variation in the path followed by the sensor and the script will find the closest node to the sensor value.

In [7]:
print(title)

realValues = processRealRoutesIntoSteps(G,real, 200,200, title) 

display(realValues.head(10))    
finalPath = recreateRouteByDijkstra(G, realValues)
print('Feed this into the Process_reCreatedRoutes_Against_RealRoutes_FINAL.ipynb')
print('Final path:' ,finalPath)


2023-08-03AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


File 2023-08-03AirSpeckLogsOutdoorWalkingF28ABF2B57EA__route_1 successfully saved at "./LondonJourneysProcessed/recreatedRoutes/"


Unnamed: 0,avgPollution,cumulativeDistance,sumPollution,gpsLongitude,gpsLatitude,node
0,,0.0,0.0,-0.214819,51.525181,9300
1,6.632901,200.0,6.632901,-0.21212,51.523949,10263
2,4.786569,400.0,11.41947,-0.211914,51.522793,10262
3,4.521964,600.0,15.941434,-0.209685,51.521191,2372
4,4.817057,800.0,20.758491,-0.20798,51.51989,15885
5,5.578342,1000.0,26.336833,-0.206854,51.518341,9784
6,5.314089,1200.0,31.650922,-0.204838,51.517334,9786
7,5.443933,1400.0,37.094855,-0.20334,51.516663,10478
8,5.109875,1600.0,42.20473,-0.202631,51.515491,8241
9,5.347176,1800.0,47.551906,-0.200811,51.514381,5481


Feed this into the Process_reCreatedRoutes_Against_RealRoutes_FINAL.ipynb
Final path: [9300, 9298, 9301, 10263, 10263, 10262, 10262, 10259, 10261, 10256, 10255, 10254, 2369, 2372, 2372, 2373, 10471, 15889, 15886, 15885, 15885, 15886, 15889, 10471, 10268, 10470, 10472, 5877, 10474, 5875, 5872, 5874, 9784, 9784, 9776, 9773, 9775, 9786, 9786, 9787, 9788, 10478, 10478, 8242, 8241, 8241, 8244, 6243, 5481, 5481, 5479, 1676, 1672, 1672, 1671, 1671, 5510, 5500, 5499, 5499, 5502, 5518, 5516, 5516, 5517, 10513, 10513, 10511, 269, 268, 6325, 6324, 6324, 266, 265, 10297, 10297, 9216, 3583, 3597, 3597, 3596, 3599, 3599, 3868, 3868, 3601, 3865, 3865, 9318, 11354, 11354, 9318, 9208, 12362, 12364, 12364, 14046, 3607, 3605]


## Plot Recreated Route

In [6]:
plotRecreatedRoute(finalPath, title, G)