Permalink
Browse files

Complete find_route script - lookup of stations passed on the command…

… line, KML output, lots of documentation
  • Loading branch information...
1 parent c610988 commit f7a0b5c298bb68c728b82890d775dfaed61be768 @gasman gasman committed Nov 2, 2012
Showing with 199 additions and 47 deletions.
  1. +195 −45 find_route.py
  2. +4 −2 graph_reduction.txt
View
@@ -1,105 +1,255 @@
-from sys import argv
+# find_route.py: Traverse the 'paths' table using Dijkstra's Algorithm to find the shortest
+# distance between two points on or near the railway network.
+
+# A 'path' is a non-branching section of track joining two nodes, identified as node1_id and node2_id.
+# To minimise the number of permutations involved in database access, we stipulate that
+# node1_id < node2_id.
+# For each path, we know its actual geographical route (expressed in the database as a PostGIS linestring)
+# and its length in metres.
+
+from sys import argv, exit
import psycopg2
+import re
+
+if len(argv) == 4:
+ (dbname, origin_station, destination_station) = argv[1:4]
+else:
+ print "Usage: find_route.py <dbname> <origin-station> <destination-station>"
+ exit()
-conn = psycopg2.connect("dbname=%s user=postgres" % argv[1])
+conn = psycopg2.connect("dbname=%s user=postgres" % dbname)
cur = conn.cursor()
-origin = (-1.2703813, 51.7534277) # Oxford
-destination = (-0.1764437, 51.5160254) # Paddington
+# Find a geom object corresponding to the passed station string, which may be a (long, lat) pair
+# or a station name. Return None if not found
+def find_station_geom(station):
+ # try to interpret origin as a (long, lat) pair
+ match = re.match(r'([\+\-\d\.]+)\s*\,\s*([\+\-\d\.]+)$', station)
+ if match:
+ origin = match.groups()
+ # convert origin point to a geometry object
+ cur.execute("""
+ SELECT ST_SetSRID(ST_Point(%s, %s), 4326)
+ """, (float(origin[0]), float(origin[1])))
+ (geom, ) = cur.fetchone()
+ return geom
+ else:
+ # look up as a station name
+ cur.execute("""
+ SELECT geom
+ FROM stations
+ WHERE name = %s
+ """, (station,))
+ result = cur.fetchone()
+ if result:
+ (geom, ) = result
+ return geom
+
+origin_geom = find_station_geom(origin_station)
+if not origin_geom:
+ print "Could not find station %s" % origin_station
+ exit()
+
+destination_geom = find_station_geom(destination_station)
+if not destination_geom:
+ print "Could not find station %s" % destination_station
+ exit()
+
+
+# Any paths which pass within this number of metres of the origin/destination point
+# are considered to be valid start/end positions for the route
+STATION_RADIUS = 80
+
+
+# Set up data structures used during the algorithm's run
+
+# Mapping from node ID to the shortest distance so far found between the origin point and that node
distance_to_node = {}
-predecessor_for_node = {}
+
+# Mapping from node ID to a (path_id, last_node_id, start_frac, end_frac) tuple, where:
+# path_id = ID of the path immediately leading up to that node on the shortest route found so far
+# last_node_id = ID of the node at the far end of that path
+# start_frac = how far along the linestring (expressed as a fraction from 0..1) last_node is
+# end_frac = how far along the linestring (expressed as a fraction from 0..1) the present node is.
+# NB start_frac and end_frac will always be 0 or 1 (indicating the full extent of the path)
+# except for paths at the origin and destination, which start/end at the specific point
+# closest to the original requested origin/destination point
+path_to_node = {}
+
+# Contains IDs of all nodes for which we know we've established the shortest possible route
visited_nodes = set()
+
+# Contains IDs of all nodes which we've encountered by following paths, but have not yet
+# exhausted the possibilities for finding shorter routes
seen_nodes = set()
-# log a new minimum distance to a specified node (and the ID of the previous node leading up to it)
-# iff it represents an improvement over the previous logged distance
-def log_distance(node_id, last_node_id, distance):
+# Helper function:
+# log a new minimum distance to a specified node (and the IDs of the previous path and node leading
+# up to it) iff it represents an improvement over the previous logged distance
+def log_distance(node_id, last_node_id, distance, path_id, start_frac, end_frac):
if (node_id not in distance_to_node) or distance_to_node[node_id] > distance:
distance_to_node[node_id] = distance
- predecessor_for_node[node_id] = last_node_id
+ path_to_node[node_id] = (path_id, last_node_id, start_frac, end_frac)
seen_nodes.add(node_id)
-# convert origin point to a geometry object
-cur.execute("""SELECT ST_SetSRID(ST_Point(%s, %s), 4326)""", (origin[0], origin[1]))
-(geom, ) = cur.fetchone()
-print "Finding paths within 80 metres of the origin station..."
+print "Finding paths within %d metres of the origin station..." % STATION_RADIUS
+# Find the details of paths which pass within STATION_RADIUS metres of the origin point.
+# ST_Line_Locate_Point gives us a value between 0 and 1 indicating how far along the linestring
+# the closest point to the origin point is.
cur.execute("""
SELECT id, length, node1_id, node2_id, ST_Line_Locate_Point(linestring, %s) AS distance_along
FROM paths
- WHERE ST_Distance_Sphere(linestring, %s) < 80
-""", (geom, geom))
+ WHERE ST_Distance_Sphere(linestring, %s) < %s
+""", (origin_geom, origin_geom, STATION_RADIUS))
-# log the distances to the end points of these closest paths, calculated from length and distance_along
+# The nodes on either end of these paths form the initial set of 'seen' nodes.
+# By scaling our distance_along value (a fraction from 0 to 1) by the total length of the path,
+# we can determine the absolute distance from the origin point (or rather, the nearest point on the
+# path to the origin point) to these end nodes.
for (path_id, length, node1_id, node2_id, distance_along) in cur:
- log_distance(node1_id, None, length * distance_along)
- log_distance(node2_id, None, length * (1 - distance_along))
-
+ log_distance(node1_id, None, length * distance_along, path_id, distance_along, 0)
+ log_distance(node2_id, None, length * (1 - distance_along), path_id, distance_along, 1)
-# convert destination point to a geometry object
-cur.execute("""SELECT ST_SetSRID(ST_Point(%s, %s), 4326)""", (destination[0], destination[1]))
-(geom, ) = cur.fetchone()
-print "Finding paths within 80 metres of the destination station..."
+# Now repeat the process at the destination end. When we perform database queries to find
+# onward paths to follow from a particular node, we'll also check the node on our list of candidate
+# destination nodes.
+print "Finding paths within %d metres of the destination station..." % STATION_RADIUS
cur.execute("""
SELECT id, length, node1_id, node2_id, ST_Line_Locate_Point(linestring, %s) AS distance_along
FROM paths
- WHERE ST_Distance_Sphere(linestring, %s) < 80
-""", (geom, geom))
+ WHERE ST_Distance_Sphere(linestring, %s) < %s
+""", (destination_geom, destination_geom, STATION_RADIUS))
+
+
+# The destination_nodes dict will contain the node IDs of the nodes at either end of the paths
+# which pass within STATION_RADIUS metres of the destination point.
+# These are mapped to a tuple of (distance, path_id, start_frac, end_frac), where:
+# distance = absolute distance from that node to the destination point
+# (or rather, the nearest point on the path to the destination point)
+# start_frac = how far along the linestring (as a fraction from 0..1) the end node is
+# (i.e. 0 if the end node is node1, 1 if it's node2)
+# end_frac = how far along the linestring (as a fraction from 0..1) the destination point is
+destination_nodes = {}
-destination_nodes = {} # stores distances between the destination point and the nodes at the end of the adjoining paths
-# log the distances to the end points of these closest paths, calculated from length and distance_along
for (path_id, length, node1_id, node2_id, distance_along) in cur:
node1_distance = length * distance_along
+ # store a new distance/path against this node ID if we haven't encountered it already,
+ # or if the new distance is shorter than the previously encountered one
if (node1_id not in destination_nodes) or (destination_nodes[node1_id] > node1_distance):
- destination_nodes[node1_id] = node1_distance
+ destination_nodes[node1_id] = (node1_distance, path_id, 0, distance_along)
node2_distance = length * (1 - distance_along)
if (node2_id not in destination_nodes) or (destination_nodes[node2_id] > node2_distance):
- destination_nodes[node2_id] = node2_distance
+ destination_nodes[node2_id] = (node2_distance, path_id, 1, distance_along)
+
while True:
- # find the node in seen_nodes with the smallest distance
- current_distance = None
- current_node = None
+ # Look for the node in seen_nodes with the smallest distance from the origin.
+ # Since this is the smallest, we know that we cannot possibly improve on it by following a
+ # route through any of the other nodes in seen_nodes - and thus we can declare it a minimal
+ # route (and move it to the visited_nodes set).
+
+ current_distance = None # the shortest distance we've encountered so far in our scan through seen_nodes
+ current_node = None # ID of the node which has this shortest distance
for node_id in seen_nodes:
+ # update current_distance and current_node if it's an improvement over the shortest distance found so far
if current_distance is None or distance_to_node[node_id] < current_distance:
current_node = node_id
current_distance = distance_to_node[node_id]
if current_node is None or current_node == -1:
- break # no more nodes, or encountered the destination node (special-cased with node ID -1)
+ # A current_node of None means that seen_nodes was empty - i.e. we have exhausted the
+ # set of possible nodes to check.
+ # -1 is a special case node ID representing our destination point, and this indicates that
+ # we have found a minimal route to that destination and thus arrived at our answer.
+ break # in either case, we can stop iterating
print "following paths from node %s (%s metres from origin)" % (current_node, current_distance)
+ # Move current_node from seen_nodes to visited_nodes, as we can be sure it's a minimal route
seen_nodes.remove(current_node)
visited_nodes.add(current_node)
- # find all neighbouring nodes
+ # Query the database for all paths leading on from this node, and the IDs of the nodes at the far end
cur.execute("""
- SELECT (CASE WHEN node1_id = %s THEN node2_id ELSE node1_id END) AS node_id, length
+ SELECT
+ id,
+ (CASE WHEN node1_id = %s THEN node2_id ELSE node1_id END) AS node_id,
+ length,
+ (CASE WHEN node1_id = %s THEN 0 ELSE 1 END) AS start_frac
FROM paths
WHERE node1_id = %s OR node2_id = %s
- """, (current_node, current_node, current_node))
+ """, (current_node, current_node, current_node, current_node))
- for (next_node, length) in cur:
+ for (path_id, next_node, length, start_frac) in cur:
if next_node in visited_nodes:
+ # ignore paths that take us back to a node that we already have a minimal route for
continue
- log_distance(next_node, current_node, current_distance + length)
+ # compute the distance from the origin to this new node as
+ # current_distance (the distance from origin to current_node) plus the new path length,
+ # and log this in our data structures if it represents an improvement over any previous route
+ # to that node
+ log_distance(next_node, current_node, current_distance + length, path_id, start_frac, 1 - start_frac)
- # also, see if current_node is present in destination_nodes, which means that there's a direct
- # link from it to the destination point (designated with the special ID number -1)
+ # also, see if current_node has an entry in destination_nodes; if it does, we can treat that
+ # as equivalent to a path taking us to the final destination point (indicated with the dummy
+ # node ID -1).
if current_node in destination_nodes:
- log_distance(-1, current_node, current_distance + destination_nodes[current_node])
+ (distance, path_id, start_frac, end_frac) = destination_nodes[current_node]
+ log_distance(-1, current_node, current_distance + distance, path_id, start_frac, end_frac)
+# We have now left the loop; a current_node of -1 indicates that we have successfully reached
+# the destination point (and thus found a minimal route to it).
if current_node == -1:
- print "path back from destination:"
- while current_node:
- print current_node
- current_node = predecessor_for_node.get(current_node)
+ # Each node's entry in path_to_node points to the path and previous node leading up to it,
+ # so we can follow the chain back until we reach a node in our initial origin set, which
+ # has 'None' as its predecessor.
+
+ route_geom = None # assemble the geometry for the full route as a 'multilinestring' geometry type in this variable
+ while current_node is not None:
+ (path_id, next_node, start_frac, end_frac) = path_to_node[current_node]
+ print "%d to %d via path %d" % (current_node, (next_node or 0), path_id)
+ if start_frac == end_frac:
+ # avoid adding single points to the accumulated geometry, as this turns it into
+ # a geometrycollection rather than a multilinestring
+ pass
+ if start_frac <= end_frac:
+ # retrieve the substring of the linestring between start_frac and end_frac
+ cur.execute("""
+ SELECT ST_Collect(
+ ST_Line_Substring(linestring, %s, %s),
+ %s
+ )
+ FROM paths
+ WHERE id = %s
+ """, (start_frac, end_frac, route_geom, path_id))
+ else:
+ # start_frac > end_frac, indicating that we follow the path in reverse;
+ # ST_Line_Substring can't handle reversed substrings itself, so we need to switch the
+ # endpoints around, then reverse the result
+ cur.execute("""
+ SELECT ST_Collect(
+ ST_Reverse(ST_Line_Substring(linestring, %s, %s)),
+ %s
+ )
+ FROM paths
+ WHERE id = %s
+ """, (end_frac, start_frac, route_geom, path_id))
+
+ (route_geom, ) = cur.fetchone()
+ current_node = next_node
+
+ # condense route_geom into a single linestring and output it as KML
+ cur.execute("""
+ SELECT ST_AsKML(ST_LineMerge(ST_CollectionExtract(%s, 2)))
+ """, (route_geom,))
+ (kml, ) = cur.fetchone()
+ print kml
else:
print "Exhausted search without finding destination point"
View
@@ -103,6 +103,8 @@ python build_paths.py traingraph
python add_linestrings.py traingraph
-10) Output as KML
+10) The database is now ready to run path-finding queries using the find_route script:
-python graph_to_kml.py traingraph > railways.kml
+python find_route.py traingraph Oxford "London Paddington"
+
+- which will output a KML LineString element indicating the shortest path between the two stations.

0 comments on commit f7a0b5c

Please sign in to comment.