Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 231 lines (195 sloc) 12.083 kB
2708db4 @qwghlm Initial commit of Python code
authored
1 #!/usr/bin/env python
848f2db @qwghlm Total overhaul of Tube-related code, deals with bidirectional platfor…
authored
2 # -*- coding: utf-8 -*-
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
3 #pylint: disable=W0142,R0201
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
4 """
3fdb3cc @qwghlm Better comments
authored
5
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
6 When's My Bus?
364c4fb @qwghlm Documentation
authored
7 (c) 2011-12 Chris Applegate (chris AT qwghlm DOT co DOT uk)
8 Released under the MIT License
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
9
10 A Twitter bot that takes requests for a bus, and replies with the real-time data from TfL on Twitter
11
364c4fb @qwghlm Documentation
authored
12 Inherits many methods and data structures from WhensMyTransport, including: loading the databases, config, connecting to Twitter,
13 reading @ replies, replying to them, checking new followers, following them back
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
14
364c4fb @qwghlm Documentation
authored
15 This module just does work specific to buses: Parsing & interpreting a bus-specific message, and looking it up against the database of
16 buses and routes, checking the TfL bus API and formatting an appropriate reply to be sent back
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
17 """
03feaac @qwghlm Add unit testing compatibility
authored
18 # Standard libraries of Python 2.6
01207a3 @qwghlm Return logging to native Python
authored
19 import logging
2708db4 @qwghlm Initial commit of Python code
authored
20 import re
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
21 from time import localtime
29574e4 @qwghlm Whitespace fixes
authored
22 from pprint import pprint # For debugging
2708db4 @qwghlm Initial commit of Python code
authored
23
c582c69 @qwghlm Merge geotools and geocoding modules
authored
24 # From other modules in this package
d3ee47c @qwghlm Split WMT*/WMB/WMT into three files
authored
25 from whensmytransport import WhensMyTransport
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
26 from lib.geo import heading_to_direction
a3f40d5 @qwghlm Reorganise & refactor library files into separate lib/ directory
authored
27 from lib.exceptions import WhensMyTransportException
5d9f626 @qwghlm Abstract data models for BusStop, Train etc. to separate module
authored
28 from lib.models import BusStop
3e8edfb @qwghlm Merge in from bleeding-edge
authored
29
29574e4 @qwghlm Whitespace fixes
authored
30
2e8d574 @qwghlm Some generification and refactoring
authored
31 class WhensMyBus(WhensMyTransport):
32 """
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
33 Main class devoted to checking for bus-related Tweets and replying to them.
2e8d574 @qwghlm Some generification and refactoring
authored
34 """
35 def __init__(self, testing=None, silent=False):
3920fc7 @qwghlm More comments
authored
36 """
169cca2 @qwghlm Fix errors, get unit tests to work again
authored
37 Constructor for the WhensMyBus class
3920fc7 @qwghlm More comments
authored
38 """
2e8d574 @qwghlm Some generification and refactoring
authored
39 WhensMyTransport.__init__(self, 'whensmybus', testing, silent)
40
20c3330 @qwghlm Add "from <bus stop ID>" functionality
authored
41 def parse_message(self, message):
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
42 """
3e8edfb @qwghlm Merge in from bleeding-edge
authored
43 Parse a Tweet - tokenize it, and then pull out any bus numbers in it
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
44 """
b9ff735 @qwghlm Completely rewrite parser from scratch, fix hashtag detection
authored
45 route_regex = "[A-Z]{0,2}[0-9]{1,3}"
3e8edfb @qwghlm Merge in from bleeding-edge
authored
46 (route_string, origin, destination) = self.tokenize_message(message, route_regex)
47 # Count along from the start and match as many tokens that look like a route number
48 route_token_matches = [re.match(route_regex, r, re.I) for r in route_string.split(' ')]
49 route_numbers = [r.group(0).upper() for r in route_token_matches if r]
b9ff735 @qwghlm Completely rewrite parser from scratch, fix hashtag detection
authored
50 if not route_numbers:
01207a3 @qwghlm Return logging to native Python
authored
51 logging.debug("@ reply didn't contain a valid-looking bus number, skipping")
20c3330 @qwghlm Add "from <bus stop ID>" functionality
authored
52 return (None, None, None)
b9ff735 @qwghlm Completely rewrite parser from scratch, fix hashtag detection
authored
53
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
54 return (route_numbers, origin, destination)
55
56 def process_individual_request(self, route_number, origin, destination, position=None):
57 """
58 Take an individual route number, with either origin or position, and optional destination, and work out
364c4fb @qwghlm Documentation
authored
59 the stops and thus the appropriate times for the user, and return an appropriate reply to that user
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
60 """
61 # Not all valid-looking bus numbers are real bus numbers (e.g. 214, RV11) so we check database to make sure
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
62 if not self.geodata.check_existence_of('Route', route_number):
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
63 raise WhensMyTransportException('nonexistent_bus', route_number)
64
65 # Dig out relevant stop for this route from the geotag, if provided
66 if position:
67 relevant_stops = self.get_stops_by_geolocation(route_number, position)
68 # Else there will be an origin (either a number or a placename), so try parsing it properly
69 else:
70 relevant_stops = self.get_stops_by_stop_name(route_number, origin)
71
72 # See if we can narrow down the runs offered by destination
73 if relevant_stops and destination:
74 try:
75 possible_destinations = self.get_stops_by_stop_name(route_number, destination)
76 if possible_destinations:
29574e4 @qwghlm Whitespace fixes
authored
77 # Filter by possible destinations. For each Run, see if there is a stop matching the destination on the same
78 # run; if that stop has a sequence number greater than this stop then it's a valid route, so include this run
79 relevant_stops = dict([(run, stop) for (run, stop) in relevant_stops.items()
3e8edfb @qwghlm Merge in from bleeding-edge
authored
80 if run in possible_destinations and possible_destinations[run].sequence > stop.sequence])
01207a3 @qwghlm Return logging to native Python
authored
81 logging.debug("Successfully found a match for destination %s, filtering down to runs: %s", destination, relevant_stops.keys())
90f2817 @qwghlm Documentation, extra logging
authored
82
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
83 # We may not be able to find a destination, in which case - don't worry about this bit, and stick to unfiltered
84 except WhensMyTransportException:
01207a3 @qwghlm Return logging to native Python
authored
85 logging.debug("Could not find a destination matching %s this route, skipping and not filtering results", destination)
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
86
3e8edfb @qwghlm Merge in from bleeding-edge
authored
87 # If the above has found stops on this route, get data for each
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
88 if relevant_stops:
3e8edfb @qwghlm Merge in from bleeding-edge
authored
89 departure_data = self.get_departure_data(relevant_stops, route_number)
90 if departure_data:
91 reply = "%s %s" % (route_number, "; ".join(departure_data))
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
92 return reply
29574e4 @qwghlm Whitespace fixes
authored
93 else:
cd0b3d3 @qwghlm Document TubeTrain object, better sorting of times
authored
94 raise WhensMyTransportException('no_bus_arrival_data', route_number)
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
95 else:
96 if re.match('^[0-9]{5}$', origin):
97 raise WhensMyTransportException('stop_id_not_found', route_number, origin)
98 else:
99 raise WhensMyTransportException('stop_name_not_found', route_number, origin)
29574e4 @qwghlm Whitespace fixes
authored
100
2708db4 @qwghlm Initial commit of Python code
authored
101 def get_stops_by_geolocation(self, route_number, position):
33a846c @qwghlm Lots of code linting & documentation; fix .gitignore
authored
102 """
0259da4 @qwghlm Better docstrings
authored
103 Take a route number and a tuple specifying latitude & longitude, and works out closest bus stops in each direction
29574e4 @qwghlm Whitespace fixes
authored
104
3920fc7 @qwghlm More comments
authored
105 Returns a dictionary:
106 Keys are numbers of the Run (usually 1 or 2, sometimes 3 or 4).
3e8edfb @qwghlm Merge in from bleeding-edge
authored
107 Values are BusStop objects
33a846c @qwghlm Lots of code linting & documentation; fix .gitignore
authored
108 """
9af9429 @qwghlm Fuzzy string matching and a better-coded string equality function
authored
109 # A route typically has two "runs" (e.g. one eastbound, one west) but some have more than that, so work out how many we have to check
29574e4 @qwghlm Whitespace fixes
authored
110 max_runs = self.geodata.get_max_value('Run', {'Route': route_number})
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
111
146a15f @qwghlm Merge in bleeding-edge geocoding
authored
112 relevant_stops = {}
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
113
29574e4 @qwghlm Whitespace fixes
authored
114 for run in range(1, max_runs + 1):
115 stop = self.geodata.find_closest(position, {'Route': route_number, 'Run': run}, BusStop)
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
116 if stop:
117 relevant_stops[run] = stop
01207a3 @qwghlm Return logging to native Python
authored
118 logging.debug("Have found stop numbers: %s", ', '.join([stop.number for stop in relevant_stops.values()]))
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
119 return relevant_stops
29574e4 @qwghlm Whitespace fixes
authored
120
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
121 def get_stops_by_stop_number(self, route_number, stop_number):
122 """
364c4fb @qwghlm Documentation
authored
123 Take a route_number and a stop with ID stop_number, returns a dictionary with a single value. Key is the Run this stop sits on,
124 value is the corresponding BusStop object
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
125 """
de6d0fc @qwghlm Deprecate locations table, all data is now contained in routes table
authored
126 # Pull the stop ID out of the routes database and see if it exists
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
127 if not self.geodata.check_existence_of('Bus_Stop_Code', stop_number):
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
128 raise WhensMyTransportException('bad_stop_id', stop_number)
129
130 # Try and get a match on it
01207a3 @qwghlm Return logging to native Python
authored
131 logging.debug("Attempting to get an exact match on stop SMS ID %s", stop_number)
29574e4 @qwghlm Whitespace fixes
authored
132 stop = self.geodata.find_exact_match({'Bus_Stop_Code': stop_number, 'Route': route_number}, BusStop)
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
133 if stop:
29574e4 @qwghlm Whitespace fixes
authored
134 return {stop.run: stop}
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
135 else:
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
136 return {}
29574e4 @qwghlm Whitespace fixes
authored
137
6898d12 @qwghlm Filter delivery of data by destination; some other minor fixes
authored
138 def get_stops_by_stop_name(self, route_number, origin):
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
139 """
0259da4 @qwghlm Better docstrings
authored
140 Take a route number and name of the origin, and work out closest bus stops in each direction
29574e4 @qwghlm Whitespace fixes
authored
141
364c4fb @qwghlm Documentation
authored
142 Returns a dictionary. Keys are numbers of the Run (usually 1 or 2, sometimes 3 and 4). Values are BusStop objects
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
143 """
6898d12 @qwghlm Filter delivery of data by destination; some other minor fixes
authored
144 # First check to see if the name is actually an ID number - if so, then use the more precise numeric method above
145 match = re.match('^[0-9]{5}$', origin)
146 if match:
147 return self.get_stops_by_stop_number(route_number, origin)
148
9af9429 @qwghlm Fuzzy string matching and a better-coded string equality function
authored
149 # First off, try to get a match against bus stop names in database
150 # Users may not give exact details, so we try to match fuzzily
01207a3 @qwghlm Return logging to native Python
authored
151 logging.debug("Attempting to get a match on placename %s", origin)
6898d12 @qwghlm Filter delivery of data by destination; some other minor fixes
authored
152 relevant_stops = {}
29574e4 @qwghlm Whitespace fixes
authored
153
9af9429 @qwghlm Fuzzy string matching and a better-coded string equality function
authored
154 # A route typically has two "runs" (e.g. one eastbound, one west) but some have more than that, so work out how many we have to check
29574e4 @qwghlm Whitespace fixes
authored
155 max_runs = self.geodata.get_max_value('Run', {'Route': route_number})
156 for run in range(1, max_runs + 1):
157 best_match = self.geodata.find_fuzzy_match({'Route': route_number, 'Run': run}, origin, BusStop)
3c19760 @qwghlm Move out location queries to separate WMTLocations object
authored
158 if best_match:
159 logging.info("Found stop name %s for Run %s via fuzzy matching", best_match.name, best_match.run)
160 relevant_stops[run] = best_match
146a15f @qwghlm Merge in bleeding-edge geocoding
authored
161
9af9429 @qwghlm Fuzzy string matching and a better-coded string equality function
authored
162 # If we can't find a location for either Run 1 or 2, use the geocoder to find a location on that Run matching our name
a4642fb @qwghlm Move database and JSON functions out of main class and into utils
authored
163 for run in (1, 2):
164 if run not in relevant_stops and self.geocoder:
01207a3 @qwghlm Return logging to native Python
authored
165 logging.debug("No match found for run %s, attempting to get geocode placename %s", run, origin)
6eb5186 @qwghlm Remove geocodes of JSON-fetching responsibilities, this is now done b…
authored
166 geocode_url = self.geocoder.get_geocode_url(origin)
167 geodata = self.browser.fetch_json(geocode_url)
168 points = self.geocoder.parse_geodata(geodata)
7edcdab @qwghlm More fine-grained approach to locating bus stops by name
authored
169 if not points:
01207a3 @qwghlm Return logging to native Python
authored
170 logging.debug("Could not find any matching location for %s", origin)
7edcdab @qwghlm More fine-grained approach to locating bus stops by name
authored
171 continue
172
3920fc7 @qwghlm More comments
authored
173 # For each of the places found, get the nearest stop that serves this run
3e8edfb @qwghlm Merge in from bleeding-edge
authored
174 possible_stops = [self.get_stops_by_geolocation(route_number, point).get(run, None) for point in points]
175 possible_stops = [stop for stop in possible_stops if stop]
7edcdab @qwghlm More fine-grained approach to locating bus stops by name
authored
176 if possible_stops:
3e8edfb @qwghlm Merge in from bleeding-edge
authored
177 relevant_stops[run] = sorted(possible_stops)[0]
01207a3 @qwghlm Return logging to native Python
authored
178 logging.debug("Have found stop named: %s", relevant_stops[run].name)
7edcdab @qwghlm More fine-grained approach to locating bus stops by name
authored
179 else:
01207a3 @qwghlm Return logging to native Python
authored
180 logging.debug("Found a location, but could not find a nearby stop for %s", origin)
29574e4 @qwghlm Whitespace fixes
authored
181
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
182 return relevant_stops
29574e4 @qwghlm Whitespace fixes
authored
183
1af140a @qwghlm Better parser for Tweets, add location lookup by string matching
authored
184 def get_departure_data(self, relevant_stops, route_number):
33a846c @qwghlm Lots of code linting & documentation; fix .gitignore
authored
185 """
3e8edfb @qwghlm Merge in from bleeding-edge
authored
186 Fetch the JSON data from the TfL website, for a list of relevant_stops (each a BusStop object)
33a846c @qwghlm Lots of code linting & documentation; fix .gitignore
authored
187 and a particular route_number, and returns the time(s) of buses on that route serving
188 that stop(s)
189 """
2708db4 @qwghlm Initial commit of Python code
authored
190 time_info = []
90f2817 @qwghlm Documentation, extra logging
authored
191 for (run, stop) in relevant_stops.items():
146a15f @qwghlm Merge in bleeding-edge geocoding
authored
192
5d9f626 @qwghlm Abstract data models for BusStop, Train etc. to separate module
authored
193 stop_name = stop.get_clean_name()
3e8edfb @qwghlm Merge in from bleeding-edge
authored
194 tfl_url = "http://countdown.tfl.gov.uk/stopBoard/%s" % stop.number
6eb5186 @qwghlm Remove geocodes of JSON-fetching responsibilities, this is now done b…
authored
195 bus_data = self.browser.fetch_json(tfl_url)
146a15f @qwghlm Merge in bleeding-edge geocoding
authored
196 arrivals = bus_data.get('arrivals', [])
29574e4 @qwghlm Whitespace fixes
authored
197
2ba8a28 @qwghlm Better handling for empty Arrivals board; testing fix; fixed lambda n…
authored
198 # Handle TfL's JSON-encoded error message
199 if not arrivals and bus_data.get('stopBoardMessage', '') == "noPredictionsDueToSystemError":
200 raise WhensMyTransportException('tfl_server_down')
201
202 # Do the user a favour - check for both number and possible Night Bus version of the bus
203 relevant_arrivals = [a for a in arrivals if (a['routeName'] == route_number or a['routeName'] == 'N' + route_number)
a4642fb @qwghlm Move database and JSON functions out of main class and into utils
authored
204 and a['isRealTime'] and not a['isCancelled']]
2ba8a28 @qwghlm Better handling for empty Arrivals board; testing fix; fixed lambda n…
authored
205 if relevant_arrivals:
206 arrival = relevant_arrivals[0]
90f2817 @qwghlm Documentation, extra logging
authored
207 scheduled_time = arrival['scheduledTime'].replace(':', '')
2ba8a28 @qwghlm Better handling for empty Arrivals board; testing fix; fixed lambda n…
authored
208 # Short hack to get BST working
6d82f98 @qwghlm Cleanup imports, move some module-specific functions from utils into …
authored
209 if localtime().tm_isdst:
2ba8a28 @qwghlm Better handling for empty Arrivals board; testing fix; fixed lambda n…
authored
210 hour = (int(scheduled_time[0:2]) + 1) % 24
211 scheduled_time = '%02d%s' % (hour, scheduled_time[2:4])
29574e4 @qwghlm Whitespace fixes
authored
212
01207a3 @qwghlm Return logging to native Python
authored
213 logging.debug("Run %s, stop %s produced bus to %s %s", run, stop_name, arrival['destination'], scheduled_time)
2ba8a28 @qwghlm Better handling for empty Arrivals board; testing fix; fixed lambda n…
authored
214 time_info.append("%s to %s %s" % (stop_name, arrival['destination'], scheduled_time))
146a15f @qwghlm Merge in bleeding-edge geocoding
authored
215 else:
01207a3 @qwghlm Return logging to native Python
authored
216 logging.debug("Run %s, stop %s produced no buses", run, stop_name)
3e8edfb @qwghlm Merge in from bleeding-edge
authored
217 time_info.append("%s: None shown going %s" % (stop_name, heading_to_direction(stop.heading)))
901eb52 @qwghlm Extra code to avoid over-long Tweets; remove Tramlink symbol
authored
218
219 # If the number of runs is 3 or 4, get rid of any "None shown"
220 if len(time_info) > 2:
01207a3 @qwghlm Return logging to native Python
authored
221 logging.debug("Number of runs is %s, removing any non-existent entries", len(time_info))
901eb52 @qwghlm Extra code to avoid over-long Tweets; remove Tramlink symbol
authored
222 time_info = [t for t in time_info if t.find("None shown") == -1]
fba8561 @qwghlm Fixed bug if people did not send a number (e.g. "Thanks"), code comme…
authored
223
2708db4 @qwghlm Initial commit of Python code
authored
224 return time_info
225
364c4fb @qwghlm Documentation
authored
226 # If this script is called directly, check our Tweets and Followers, and reply/follow as appropriate
2708db4 @qwghlm Initial commit of Python code
authored
227 if __name__ == "__main__":
af7e591 @qwghlm Docstring fix
authored
228 # Instantiate with no variables (all config is done in the file config.cfg) and then call check_tweets()
e2099f9 @qwghlm Added support for multiple routes in one Tweet + some refactoring
authored
229 WMB = WhensMyBus()
230 WMB.check_tweets()
Something went wrong with that request. Please try again.