/
find_route.py
257 lines (215 loc) · 11.1 KB
/
find_route.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# find_route.py: Traverse the 'paths' table using Dijkstra's Algorithm to find the shortest
# distance between two points on or near the railway network.
# A 'path' is a non-branching section of track joining two nodes, identified as node1_id and node2_id.
# To minimise the number of permutations involved in database access, we stipulate that
# node1_id < node2_id.
# For each path, we know its actual geographical route (expressed in the database as a PostGIS linestring)
# and its length in metres.
from sys import argv, exit
import psycopg2
import re
if len(argv) == 4:
(dbname, origin_station, destination_station) = argv[1:4]
else:
print "Usage: find_route.py <dbname> <origin-station> <destination-station>"
exit()
conn = psycopg2.connect("dbname=%s user=postgres" % dbname)
cur = conn.cursor()
# Find a geom object corresponding to the passed station string, which may be a (long, lat) pair
# or a station name. Return None if not found
def find_station_geom(station):
# try to interpret origin as a (long, lat) pair
match = re.match(r'([\+\-\d\.]+)\s*\,\s*([\+\-\d\.]+)$', station)
if match:
origin = match.groups()
# convert origin point to a geometry object
cur.execute("""
SELECT ST_SetSRID(ST_Point(%s, %s), 4326)
""", (float(origin[0]), float(origin[1])))
(geom, ) = cur.fetchone()
return geom
else:
# look up as a station name
cur.execute("""
SELECT geom
FROM stations
WHERE name = %s
""", (station,))
result = cur.fetchone()
if result:
(geom, ) = result
return geom
origin_geom = find_station_geom(origin_station)
if not origin_geom:
print "Could not find station %s" % origin_station
exit()
destination_geom = find_station_geom(destination_station)
if not destination_geom:
print "Could not find station %s" % destination_station
exit()
# Any paths which pass within this number of metres of the origin/destination point
# are considered to be valid start/end positions for the route
STATION_RADIUS = 80
# Set up data structures used during the algorithm's run
# Mapping from node ID to the shortest distance so far found between the origin point and that node
distance_to_node = {}
# Mapping from node ID to a (path_id, last_node_id, start_frac, end_frac) tuple, where:
# path_id = ID of the path immediately leading up to that node on the shortest route found so far
# last_node_id = ID of the node at the far end of that path
# start_frac = how far along the linestring (expressed as a fraction from 0..1) last_node is
# end_frac = how far along the linestring (expressed as a fraction from 0..1) the present node is.
# NB start_frac and end_frac will always be 0 or 1 (indicating the full extent of the path)
# except for paths at the origin and destination, which start/end at the specific point
# closest to the original requested origin/destination point
path_to_node = {}
# Contains IDs of all nodes for which we know we've established the shortest possible route
visited_nodes = set()
# Contains IDs of all nodes which we've encountered by following paths, but have not yet
# exhausted the possibilities for finding shorter routes
seen_nodes = set()
# Helper function:
# log a new minimum distance to a specified node (and the IDs of the previous path and node leading
# up to it) iff it represents an improvement over the previous logged distance
def log_distance(node_id, last_node_id, distance, path_id, start_frac, end_frac):
if (node_id not in distance_to_node) or distance_to_node[node_id] > distance:
distance_to_node[node_id] = distance
path_to_node[node_id] = (path_id, last_node_id, start_frac, end_frac)
seen_nodes.add(node_id)
print "Finding paths within %d metres of the origin station..." % STATION_RADIUS
# Find the details of paths which pass within STATION_RADIUS metres of the origin point.
# ST_Line_Locate_Point gives us a value between 0 and 1 indicating how far along the linestring
# the closest point to the origin point is.
cur.execute("""
SELECT id, length, node1_id, node2_id, ST_Line_Locate_Point(linestring, %s) AS distance_along
FROM paths
WHERE ST_Distance_Sphere(linestring, %s) < %s
""", (origin_geom, origin_geom, STATION_RADIUS))
# The nodes on either end of these paths form the initial set of 'seen' nodes.
# By scaling our distance_along value (a fraction from 0 to 1) by the total length of the path,
# we can determine the absolute distance from the origin point (or rather, the nearest point on the
# path to the origin point) to these end nodes.
for (path_id, length, node1_id, node2_id, distance_along) in cur:
log_distance(node1_id, None, length * distance_along, path_id, distance_along, 0)
log_distance(node2_id, None, length * (1 - distance_along), path_id, distance_along, 1)
# Now repeat the process at the destination end. When we perform database queries to find
# onward paths to follow from a particular node, we'll also check the node on our list of candidate
# destination nodes.
print "Finding paths within %d metres of the destination station..." % STATION_RADIUS
cur.execute("""
SELECT id, length, node1_id, node2_id, ST_Line_Locate_Point(linestring, %s) AS distance_along
FROM paths
WHERE ST_Distance_Sphere(linestring, %s) < %s
""", (destination_geom, destination_geom, STATION_RADIUS))
# The destination_nodes dict will contain the node IDs of the nodes at either end of the paths
# which pass within STATION_RADIUS metres of the destination point.
# These are mapped to a tuple of (distance, path_id, start_frac, end_frac), where:
# distance = absolute distance from that node to the destination point
# (or rather, the nearest point on the path to the destination point)
# start_frac = how far along the linestring (as a fraction from 0..1) the end node is
# (i.e. 0 if the end node is node1, 1 if it's node2)
# end_frac = how far along the linestring (as a fraction from 0..1) the destination point is
destination_nodes = {}
for (path_id, length, node1_id, node2_id, distance_along) in cur:
node1_distance = length * distance_along
# store a new distance/path against this node ID if we haven't encountered it already,
# or if the new distance is shorter than the previously encountered one
if (node1_id not in destination_nodes) or (destination_nodes[node1_id] > node1_distance):
destination_nodes[node1_id] = (node1_distance, path_id, 0, distance_along)
node2_distance = length * (1 - distance_along)
if (node2_id not in destination_nodes) or (destination_nodes[node2_id] > node2_distance):
destination_nodes[node2_id] = (node2_distance, path_id, 1, distance_along)
while True:
# Look for the node in seen_nodes with the smallest distance from the origin.
# Since this is the smallest, we know that we cannot possibly improve on it by following a
# route through any of the other nodes in seen_nodes - and thus we can declare it a minimal
# route (and move it to the visited_nodes set).
current_distance = None # the shortest distance we've encountered so far in our scan through seen_nodes
current_node = None # ID of the node which has this shortest distance
for node_id in seen_nodes:
# update current_distance and current_node if it's an improvement over the shortest distance found so far
if current_distance is None or distance_to_node[node_id] < current_distance:
current_node = node_id
current_distance = distance_to_node[node_id]
if current_node is None or current_node == -1:
# A current_node of None means that seen_nodes was empty - i.e. we have exhausted the
# set of possible nodes to check.
# -1 is a special case node ID representing our destination point, and this indicates that
# we have found a minimal route to that destination and thus arrived at our answer.
break # in either case, we can stop iterating
print "following paths from node %s (%s metres from origin)" % (current_node, current_distance)
# Move current_node from seen_nodes to visited_nodes, as we can be sure it's a minimal route
seen_nodes.remove(current_node)
visited_nodes.add(current_node)
# Query the database for all paths leading on from this node, and the IDs of the nodes at the far end
cur.execute("""
SELECT
id,
(CASE WHEN node1_id = %s THEN node2_id ELSE node1_id END) AS node_id,
length,
(CASE WHEN node1_id = %s THEN 0 ELSE 1 END) AS start_frac
FROM paths
WHERE node1_id = %s OR node2_id = %s
""", (current_node, current_node, current_node, current_node))
for (path_id, next_node, length, start_frac) in cur:
if next_node in visited_nodes:
# ignore paths that take us back to a node that we already have a minimal route for
continue
# compute the distance from the origin to this new node as
# current_distance (the distance from origin to current_node) plus the new path length,
# and log this in our data structures if it represents an improvement over any previous route
# to that node
log_distance(next_node, current_node, current_distance + length, path_id, start_frac, 1 - start_frac)
# also, see if current_node has an entry in destination_nodes; if it does, we can treat that
# as equivalent to a path taking us to the final destination point (indicated with the dummy
# node ID -1).
if current_node in destination_nodes:
(distance, path_id, start_frac, end_frac) = destination_nodes[current_node]
log_distance(-1, current_node, current_distance + distance, path_id, start_frac, end_frac)
# We have now left the loop; a current_node of -1 indicates that we have successfully reached
# the destination point (and thus found a minimal route to it).
if current_node == -1:
# Each node's entry in path_to_node points to the path and previous node leading up to it,
# so we can follow the chain back until we reach a node in our initial origin set, which
# has 'None' as its predecessor.
route_geom = None # assemble the geometry for the full route as a 'multilinestring' geometry type in this variable
while current_node is not None:
(path_id, next_node, start_frac, end_frac) = path_to_node[current_node]
print "%d to %d via path %d" % (current_node, (next_node or 0), path_id)
if start_frac == end_frac:
# avoid adding single points to the accumulated geometry, as this turns it into
# a geometrycollection rather than a multilinestring
pass
if start_frac <= end_frac:
# retrieve the substring of the linestring between start_frac and end_frac
cur.execute("""
SELECT ST_Collect(
ST_Line_Substring(linestring, %s, %s),
%s
)
FROM paths
WHERE id = %s
""", (start_frac, end_frac, route_geom, path_id))
else:
# start_frac > end_frac, indicating that we follow the path in reverse;
# ST_Line_Substring can't handle reversed substrings itself, so we need to switch the
# endpoints around, then reverse the result
cur.execute("""
SELECT ST_Collect(
ST_Reverse(ST_Line_Substring(linestring, %s, %s)),
%s
)
FROM paths
WHERE id = %s
""", (end_frac, start_frac, route_geom, path_id))
(route_geom, ) = cur.fetchone()
current_node = next_node
# condense route_geom into a single linestring and output it as KML
cur.execute("""
SELECT ST_AsKML(ST_LineMerge(ST_CollectionExtract(%s, 2)))
""", (route_geom,))
(kml, ) = cur.fetchone()
print kml
else:
print "Exhausted search without finding destination point"
cur.close()
conn.close()