## Purpose
This notebook is for testing the pbf conversion tool to implement different speeds on the OSM ways.

In [15]:
import xml.etree.ElementTree as ET
import pandas as pd
import gc
from timeit import default_timer as timer
import numpy as np

In [16]:
road_path = '/Users/askalila/Documents/TfC Offline Work/numo/otp/Clutter/Final networks/original/planet_mx.osm'


In [17]:
# Setting up Element Tree to read in the osm file
tree = ET.parse(road_path)
root = tree.getroot()

In [18]:
def CopyNodes (originalway, nodesequence, realspeed, units, newwayID, verbose = 'no'):
    """
    Purpose: this function creates a new way with the nodes that have speed and adds attribute to the old and new ways to mark them as synthetic
    
    input:
        originalway: original way element object
        nodesequence: list of node IDs to be copied as strings in list, in the correct direction
        realspeed: mean speed as string or 'no' to skip changing maxspeed tag
        units: string of 'mph' or'kph'
        newwayID: string with a new way ID number
        
    no outputs
    """

    newway = ET.Element('way', attrib = dict(originalway.items())) #creates new way with same attributes

    # add nodes as nd children
    
    for i,anode in enumerate(nodesequence):    
        NodeRef = ET.Element('nd', attrib = {'ref':anode})
        newway.append(NodeRef)
        

    # adds attributes and tags to new way
    newway.set('id', newwayID) #replace orig way id with new id
    newway.set('synthetic','yes')
    newway.extend(originalway.findall("./tag")) #copies over all the exisitng tags

    if realspeed != 'no': 
    
        # add the maxspeed tag from real speed or replace current maxspeed with real speed
        if newway.find('tag/[@k="maxspeed"]') is None:
            speedtag = ET.Element('tag', attrib = {'k':'maxspeed','v':realspeed})
            newway.append(speedtag)   # adds a tag element under way
        else:
            newway.find('tag/[@k="maxspeed"]').attrib['v'] = realspeed

    
    global root
    root.append(newway) # adds new way to the xml
    if verbose == 'yes': print('adding new way with ID',newwayID)
    
    
    

In [19]:
def CheckNodesForSpeeds (wayID, originalway, units, verbose = 'no'):
    """
    This function will check the Speeds Dataframe (global variable) for speeds between node pairs in the way.
    
    Global variables that need to be present:
        root
        speedsDFpeak
    inputs: 
        wayID: original way element's ID as string
        nodelist: the way's list of node reference ids as strings
        units: 'mph' or 'kph' string
        newwayID_seed: starting number for new way's to iterate over by 1
    
    function Calls another function to do:
        - creates new way with nodes and speed and copies all metadata and non-speed tags
        - removes the copied nodes from the original way
        - adds attribute to newway "synthetic = yes"
        
        - adds attribute to origin way "Checked_for_speed = yes"
        
    no outputs
    """
    global newwayID_seed

    #create nodelist of node id strings and similar list of floats
    nodelist = []
    nodelistfloat = []
    for node in originalway.iter('nd'):
        
        ref = node.attrib['ref']
        nodelist.append(ref)
        nodelistfloat.append(int(ref))

    # get reverse of nodelist
    reverseNodelistfloat = list(reversed(nodelistfloat))
    reverseNodelist = list(reversed(nodelist))

    # get only speeds with origin AND Destination nodes in the original way
    relevantSpeeds = speedsDFpeak.loc[(speedsDFpeak.osm_start_node_id.isin(nodelistfloat)) & (speedsDFpeak.osm_end_node_id.isin(nodelistfloat))]
#     try: 
#         relevantSpeedsShort = relevantSpeeds[['osm_start_node_id','osm_end_node_id','speed_mph_mean']]
#     else: # not sure why this is giving an error
        
#         relevantSpeedsShort = relevantSpeeds[['osm_start_node_id','osm_end_node_id','speed_kph_mean']]
    
    relevantSpeedsShort = relevantSpeeds[['osm_start_node_id','osm_end_node_id','speed_kph_mean']]
    nodepairdict = dict()
    
    

    # create new ways from the relevant speeds
    
    for nodes in relevantSpeedsShort.iterrows():
        pair = list(nodes[1])
        startnode, endnode, realspeed_float = pair[0],pair[1], pair[2]
        realspeed = str(realspeed_float)

        # check if its the direction of node sequence or reverse
        if nodelistfloat.index(endnode) > nodelistfloat.index(startnode): #correct sequence     
            startIndex, endIndex = nodelistfloat.index(startnode), nodelistfloat.index(endnode)
            CopyNodes(originalway, nodelist[startIndex:endIndex+1],realspeed,units,str(newwayID_seed))
            newwayID_seed += 1
            nodepairdict[startIndex] = endIndex

        elif nodelistfloat.index(endnode) < nodelistfloat.index(startnode): # reverse
            # check if the reverse nodepair belongs to another way (from DF in Uber, from xml tree in mapbox), 
            # if yes then skip this block
            try: # check if the speed DF has a different way ID for the reverse node direction (n/a to mapbox speed)
                samewayID = wayID == str(list(speedsDFpeak.loc[(speedsDFpeak.osm_start_node_id == startnode) & (speedsDFpeak.osm_end_node_id == endnode)]['osm_way_id'])[0])
            except: # for mapbox, check if the root has only way with the start and end node pair
                samewayID = len(root.findall("./way/nd/[@ref ='"+ str(round(startnode)) +"'].." and "./way/nd/[@ref ='"+ str(round(endnode)) +"']..")) == 1
                
            
            if samewayID:
                startIndex, endIndex = nodelistfloat.index(startnode), nodelistfloat.index(endnode)
                revstartIndex, revendIndex = reverseNodelistfloat.index(startnode), reverseNodelistfloat.index(endnode)
                CopyNodes(originalway, reverseNodelist[revstartIndex:revendIndex+1],realspeed,units,str(newwayID_seed))
                newwayID_seed += 1

        else: raise Exception('start node or end node not in way nodelist')


    ## remove nodes from original way

    startPos = 0
    endPos = len(nodelist)-1

    data_items = nodepairdict.items()
    data_list = list(data_items)
    nodepairdf = pd.DataFrame(data_list,columns=['start','end'])

    # first, delete nodes from segments at the start of the way

    while len(nodepairdf[nodepairdf.start == startPos]) > 0: #loop over this until it all start segments are removed from original way

        start = startPos
        end = nodepairdf[nodepairdf.start == startPos].values[0][1]

        for i in range(start, end): # delete nodes in orig way from startPos to end-1
            nodeID = nodelist[i]
            try: originalway.remove(originalway.find("./nd/[@ref='"+nodeID+"']"))
            except: pass
            if verbose == 'yes': print('removed node index',i)

        nodepairdf = nodepairdf.drop(nodepairdf[nodepairdf.start == startPos].index[0])  # delete this row from the df
        startPos = end


    # second, delete nodes from segments at the end of the way
    while len(nodepairdf[nodepairdf.end == endPos]) > 0:  
        end = endPos
        start = nodepairdf[nodepairdf.end == endPos].values[0][0]

        for i in range(start+1, end+1): # delete nodes in orig way from startPos +1 to endPos
            nodeID = nodelist[i]
            try: originalway.remove(originalway.find("./nd/[@ref='"+nodeID+"']"))
            except: pass
            if verbose == 'yes': print('removed node index',i)

        nodepairdf = nodepairdf.drop(nodepairdf[nodepairdf.end == endPos].index[0]) # delete this row from the df
        endPos = start



    copypairs =[]
    sortedNodepairdf = nodepairdf.sort_values(by=['start']).values
    
    # third, the remaining rows should only be nodes of middle segments. delete the middle nodes

    for i, pair in enumerate(sortedNodepairdf): 

        if pair[1] < pair[0]: continue # ignore reverse pairs
        if len(copypairs) == 0: 
            copypairs.append((startPos,pair[0]))
            lastEnd = pair[1]
        if i == len(sortedNodepairdf)-1: copypairs.append((pair[1],endPos))
        if pair[0] > lastEnd: copypairs.append((lastEnd,pair[0]))
        if i == len(sortedNodepairdf)-1 and pair[1] < endPos: copypairs.append((pair[1],endPos))

        for i in range(pair[0]+1, pair[1]): # delete nodes in orig way from startPos +1 to endPos -1
            nodeID = nodelist[i]
            try: originalway.remove(originalway.find("./nd/[@ref='"+nodeID+"']"))
            except: pass
            if verbose == 'yes': print('removed node index',i)

        lastEnd = pair[1]

    if verbose == 'yes': print('copy pairs:',copypairs)
    # create copynodes with no realspeed for all pairs in copypairs 
    for pair in copypairs:
        nodesequence = []
        for i in range(pair[0],pair[1]+1): nodesequence.append(nodelist[i])
        CopyNodes(originalway, nodesequence,'no',units, str(newwayID_seed))
        newwayID_seed += 1


In [20]:
number_ofways = len(root.findall('./way'))
print('Total number of ways in the osm:', number_ofways)

Total number of ways in the osm: 397523


Steps to follow:
- iterate through all ways
- pass to function to do: 
    - in speeds dataframe, query for nodes in the way as start nodes and end nodes. returns rows with relevant speeds in either direction
    - for each row in the speeds dataframe, copy the start node, end node and all nodes in between in the correct order to a new way. copy over all the attributes and tags of the way to the 'synthetic' ways. add an attribute that is is synthetic
    - if speeds returned is for nodes in oppostie direction, only copy the nodes to a new way if the way ID is the same, otherwise skip this node pair. Assuming the other way that this nodepari belongs to will be iterated over, they will be incorprated then.
    - remove the nodes that were copied to new ways from the original way. This preserves the nodes needed in the original way if only a subset of its nodes were cut out with real speeds. new ways with real speed in the opposite direction are not considered for the removal of nodes from the original way

- in osm, remove ways with one or fewer nd's

In [21]:
# filename = '/Users/askalila/Documents/TfC Offline Work/numo/Uber Speed/movement-speeds-quarterly-by-hod-san-francisco-2020-Q1.csv'

######### input path to average speed csv here
filename8am = '/Users/askalila/Documents/GitHub/32_wri_numo/4_Tool_Development/2_transform/avg_speeds/mexico_output.csv'
cityunits = 'kph' # string 'mph' or 'kph'

## for mapbox
colnames=['osm_start_node_id','osm_end_node_id', 'speed_kph_mean'] 
speedsDFpeak = pd.read_csv(filename8am, names=colnames, header=0)

## for uber

# speedsDFpeak = pd.read_csv(filename8am)



In [22]:
## Run the functions on all the ways in the osm xml tree

newwayID_seed = 1000  # This is a potential bug if wayID exists in the osm we are routing over.

start = timer()

for i,originalway in enumerate(root.findall("./way")):
    wayID = originalway.attrib['id']
    CheckNodesForSpeeds(wayID, originalway, cityunits, verbose='yes')
    
    
    if i in [3970,39700,198500,397000]: 
        bookmark = timer()
        progress = int(i/(number_ofways-1)*100)
        print(f'{progress}% completed in {np.round((bookmark - start)/60)} minutes...')

        
    # delete original ways with ref nodes 1 or fewer
    if len(originalway.findall("./nd")) < 2: 
        root.remove(originalway)
        print('removed way', wayID)
    if i == 400: break
        
end = timer()
print(f'100% completed in {np.round((end- start)/60)} minutes!')

copy pairs: []
copy pairs: []
copy pairs: []
copy pairs: []
removed node index 0
removed node index 1
removed node index 2
removed node index 3
removed node index 4
removed node index 5
removed node index 6
removed node index 7
removed node index 8
removed node index 9
removed node index 10
removed node index 11
removed node index 12
removed node index 13
removed node index 14
removed node index 15
removed node index 16
removed node index 17
removed node index 18
removed node index 19
removed node index 20
removed node index 21
removed node index 22
removed node index 23
removed node index 24
removed node index 25
removed node index 26
removed node index 27
removed node index 28
removed node index 29
removed node index 30
copy pairs: []
removed way 4754727
copy pairs: []
copy pairs: []
removed node index 0
removed node index 1
copy pairs: []
removed way 4758717
copy pairs: []
removed node index 0
removed node index 1
removed node index 2
removed node index 3
removed node index 4
copy p

removed node index 0
removed node index 1
removed node index 2
removed node index 3
removed node index 4
removed node index 5
removed node index 6
removed node index 7
removed node index 8
removed node index 9
removed node index 10
removed node index 11
removed node index 12
removed node index 13
removed node index 14
removed node index 15
copy pairs: []
removed node index 0
removed node index 1
removed node index 2
removed node index 3
copy pairs: []
removed way 24266671
removed node index 0
removed node index 1
removed node index 2
removed node index 3
removed node index 4
removed node index 5
removed node index 6
removed node index 7
removed node index 8
removed node index 9
removed node index 10
removed node index 30
copy pairs: [(11, 12), (28, 29), (28, 29)]
removed node index 0
removed node index 1
removed node index 2
removed node index 3
removed node index 9
removed node index 8
copy pairs: [(4, 5), (6, 7), (6, 7)]
copy pairs: []
copy pairs: []
copy pairs: []
copy pairs: []
cop

In [465]:
# output it to .osm file
tree.write('mx_real2.osm', encoding="utf-8",xml_declaration=True)

## Scratch pad

In [9]:
speedsDFpeak

Unnamed: 0,osm_start_node_id,osm_end_node_id,speed_kph_mean
0,1103693762,7352047998,18.000000
1,1108410345,3593177879,42.615385
2,1113852234,8374986775,49.000000
3,1132752997,6143099266,15.076923
4,1136377894,1136912188,28.307692
...,...,...,...
672228,8964396274,4303303984,97.000000
672229,9025775611,9025775612,11.000000
672230,9057689905,2077699373,30.000000
672231,9286217050,9286217049,93.000000


In [11]:
# for mapbox, check the root for the node pair's ways, if only one way exists, record the reverse speed, if more than one exists, skip it

# ET.dump(originalway)

wayID = "365975531"
root.findall("./way/[@id='" + wayID + "']")


[<Element 'way' at 0x7f7a00c37680>]

In [18]:
nodeID1 = "3699712317" 
nodeID2 = "3699714610"

for way in root.findall("./way/nd/[@ref ='"+ nodeID1 +"'].." and "./way/nd/[@ref ='"+ nodeID2 +"'].."):
    ET.dump(way)

# root.findall("//*[name()='way'][child::*[@ref='3699712317' and @ref='3699714610']]")

<way id="365975531">
    <nd ref="3699712317" />
    <nd ref="3699714610" />
    <nd ref="3699716901" />
    <nd ref="6416609666" />
    <nd ref="6184985498" />
    <nd ref="3699714609" />
    <nd ref="6416609667" />
    <nd ref="3699714607" />
    <nd ref="6416609668" />
    <nd ref="6184985499" />
    <nd ref="3699715639" />
    <nd ref="3699714605" />
    <nd ref="6416609665" />
    <nd ref="3699714603" />
    <nd ref="6184985634" />
    <nd ref="3699716833" />
    <nd ref="3699714601" />
    <nd ref="3699716829" />
    <nd ref="3699714600" />
    <nd ref="6184985616" />
    <nd ref="3699716776" />
    <tag k="highway" v="residential" />
    <tag k="name" v="Cerrada Tercera Aztecas" />
    <tag k="surface" v="asphalt" />
  </way>
  


In [24]:
# <way id="97870397">
nodeID1 = '3699715223'
nodeID2 = "7921115548"

# waylist = root.findall("./way/nd/[@ref ='"+ nodeID1 +"'].." and "./way/nd/[@ref ='"+ nodeID2 +"']..")

len(root.findall("./way/nd/[@ref ='"+ nodeID1 +"'].." and "./way/nd/[@ref ='"+ nodeID2 +"']..")) == 1


True

In [33]:
# speedsDFpeak.loc[(speedsDFpeak.osm_start_node_id == startnode) & (speedsDFpeak.osm_end_node_id == endnode)]['osm_way_id'])[0])
# relevantSpeeds = speedsDFpeak.loc[(speedsDFpeak.osm_start_node_id.isin(nodelistfloat)) & (speedsDFpeak.osm_end_node_id.isin(nodelistfloat))]

# filenamesf = '/Users/askalila/Documents/TfC Offline Work/numo/Uber Speed/movement-speeds-quarterly-by-hod-san-francisco-2020-Q1_8am.csv'
# speedsDFsf = pd.read_csv(filenamesf)
relevantSpeeds = speedsDFpeak.iloc[0:15]
# relevantSpeeds
# relevantSpeedsShort = relevantSpeeds[['osm_start_node_id','osm_end_node_id','speed_kph_mean']]


Unnamed: 0,osm_start_node_id,osm_end_node_id,speed_kph_mean
0,1103693762,7352047998,18.0
1,1108410345,3593177879,42.615385
2,1113852234,8374986775,49.0
3,1132752997,6143099266,15.076923
4,1136377894,1136912188,28.307692
5,1143436564,273691781,18.615385
6,1145521367,7194114148,30.0
7,1150145065,1150144999,18.0
8,1150145142,8023778294,21.0
9,1150379966,305268859,28.384615


In [34]:
for nodes in relevantSpeeds.iterrows():
    pair = list(nodes[1])
    startnode, endnode, realspeed_float = pair[0],pair[1], pair[2]
    realspeed = str(realspeed_float)

In [90]:
speedsDFpeak.iloc[2]

osm_start_node_id    1.113852e+09
osm_end_node_id      8.374987e+09
speed_kph_mean       4.900000e+01
Name: 2, dtype: float64

In [116]:
speedsDFpeak = pd.read_csv(filename8am, names=colnames, header=0)

# speedsDFpeak = pd.read_csv(filename8am, dtype={'0': np.int64, '1':int, 'speed':np.float64})


In [126]:
wayID = '654822858'
originalway = root.find("./way/[@id='" + wayID + "']")

nodelist = []
nodelistfloat = []
for node in originalway.iter('nd'):

    ref = node.attrib['ref']
    nodelist.append(ref)
    nodelistfloat.append(int(ref))

# get reverse of nodelist
reverseNodelistfloat = list(reversed(nodelistfloat))
reverseNodelist = list(reversed(nodelist))

# get only speeds with origin AND Destination nodes in the original way
relevantSpeeds = speedsDFpeak.loc[(speedsDFpeak.osm_start_node_id.isin(nodelistfloat)) & (speedsDFpeak.osm_end_node_id.isin(nodelistfloat))]

relevantSpeedsShort = relevantSpeeds[['osm_start_node_id','osm_end_node_id','speed_kph_mean']]

nodepairdict = dict()



# create new ways from the relevant speeds

for nodes in relevantSpeedsShort.iterrows():
    pair = list(nodes[1])
    startnode, endnode, realspeed_float = pair[0],pair[1], pair[2]
    realspeed = str(realspeed_float)

    # check if its the direction of node sequence or reverse
    if nodelistfloat.index(endnode) > nodelistfloat.index(startnode): #correct sequence     
        startIndex, endIndex = nodelistfloat.index(startnode), nodelistfloat.index(endnode)
#         CopyNodes(originalway, nodelist[startIndex:endIndex+1],realspeed,units,str(newwayID_seed))
#         newwayID_seed += 1
        nodepairdict[startIndex] = endIndex
        
        

In [77]:
root.find("./way/[@id='" + wayID + "']")

<Element 'way' at 0x7f7a00c37680>

In [137]:
startnode, endnode, str(round(startnode))

(6340618164.0, 6135961734.0, '6340618164')

In [140]:
len(root.findall("./way/nd/[@ref ='"+ str(round(startnode)) +"'].." and "./way/nd/[@ref ='"+ str(round(endnode)) +"']..")) == 1


True

In [123]:
speedsDFpeak.loc[(speedsDFpeak.osm_start_node_id.isin(nodelistfloat)) & (speedsDFpeak.osm_end_node_id.isin(nodelistfloat))]


Unnamed: 0,osm_start_node_id,osm_end_node_id,speed_kph_mean
614,6135961734,6340618164,3.0
30785,8197878242,6135961734,3.0
137954,3311110418,6340618164,11.0
489710,6135961734,8197878242,11.0
593214,6340618164,3311110418,3.0
594524,6340618164,6135961734,11.0


In [139]:
for way in ways:
    ET.dump(way)

<way id="654822858">
    <nd ref="3311110418" />
    <nd ref="6340618164" />
    <nd ref="6135961734" />
    <nd ref="8197878242" />
    <tag k="highway" v="residential" />
    <tag k="name" v="Avenida Décima Cerrada Las Torres" />
  </way>
  


In [136]:
speedsDFpeak[speedsDFpeak.osm_start_node_id == startnode]

Unnamed: 0,osm_start_node_id,osm_end_node_id,speed_kph_mean
593214,6340618164,3311110418,3.0
594524,6340618164,6135961734,11.0


In [119]:
speedsDFpeak.iloc[614]

osm_start_node_id    6.135962e+09
osm_end_node_id      6.340618e+09
speed_kph_mean       3.000000e+00
Name: 614, dtype: float64

In [117]:
speedsDFpeak

Unnamed: 0,osm_start_node_id,osm_end_node_id,speed_kph_mean
0,1103693762,7352047998,18.000000
1,1108410345,3593177879,42.615385
2,1113852234,8374986775,49.000000
3,1132752997,6143099266,15.076923
4,1136377894,1136912188,28.307692
...,...,...,...
672228,8964396274,4303303984,97.000000
672229,9025775611,9025775612,11.000000
672230,9057689905,2077699373,30.000000
672231,9286217050,9286217049,93.000000


In [4]:
python --version

NameError: name 'python' is not defined