-
Notifications
You must be signed in to change notification settings - Fork 1
/
whensmytransport.py
373 lines (314 loc) · 16.5 KB
/
whensmytransport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#pylint: disable=W0142,R0201
"""
When's My Transport?
A Twitter bot that takes requests for a bus or Tube, and replies the real-time data from TfL on Twitter
This is a parent classes used by all three bots, handling common functionality between them all, such as (but not limited to)
loading the databases, config, connecting to Twitter, reading @ replies, replying to them, checking new followers, following them back
as well as models and classes for useful constructs such as Trains and Stations
(c) 2011-12 Chris Applegate (chris AT qwghlm DOT co DOT uk)
Released under the MIT License
Things to do:
WhensMyTube/DLR:
- Destination handling
- Direction handling
General:
- Equivalent for National Rail (alas, tram & boat have no public APIs)
- Better Natural Language parsing
- Knowledge of network layouts for Tube & bus
- Checking of TfL APIs for weekend & long-term closures
"""
# Standard libraries of Python 2.6
import ConfigParser
import logging
import os
import re
import sys
from pprint import pprint # For debugging
# From library modules in this package
from lib.browser import WMTBrowser
from lib.exceptions import WhensMyTransportException
from lib.geo import convertWGS84toOSEastingNorthing, gridrefNumToLet, YahooGeocoder
from lib.locations import WMTLocations
from lib.logger import setup_logging
from lib.models import RailStation
from lib.twitterclient import WMTTwitterClient, is_direct_message
# Some constants we use
VERSION_NUMBER = 0.60
HOME_DIR = os.path.dirname(os.path.abspath(__file__))
class WhensMyTransport:
"""
Parent class for all WhensMy* bots, with common functions shared by all
"""
def __init__(self, instance_name, testing=None, silent_mode=False):
"""
Read config and set up logging, settings database, geocoding and Twitter OAuth
"""
# Instance name is something like 'whensmybus', 'whensmytube'
self.instance_name = instance_name
# Try opening the file first just to see if it exists, exception caught below
try:
config_file = 'config.cfg'
open(HOME_DIR + '/' + config_file)
config = ConfigParser.SafeConfigParser({ 'test_mode' : False,
'debug_level' : 'INFO',
'yahoo_app_id' : None})
config.read(HOME_DIR + '/' + config_file)
config.get(self.instance_name, 'debug_level')
except (ConfigParser.Error, IOError):
print """Fatal error: can't find a valid config file with options for %s.""" % self.instance_name
print """Please make sure there is a %s file in this directory""" % config_file
sys.exit(1)
# Name of the admin so we know who to alert if there is an issue
self.admin_name = config.get(self.instance_name, 'admin_name')
# Setup debugging
debug_level = config.get(self.instance_name, 'debug_level')
setup_logging(self.instance_name, silent_mode, debug_level)
# Setup database of stops/stations and their locations
self.geodata = WMTLocations(self.instance_name)
# Setup browser for JSON & XML
self.browser = WMTBrowser()
# Setup geocoder for looking up place names
yahoo_app_id = config.get(self.instance_name, 'yahoo_app_id')
self.geocoder = yahoo_app_id and YahooGeocoder(yahoo_app_id)
# Setup Twitter client
self.username = config.get(self.instance_name,'username')
consumer_key = config.get(self.instance_name, 'consumer_key')
consumer_secret = config.get(self.instance_name, 'consumer_secret')
access_token = config.get(self.instance_name, 'key')
access_token_secret = config.get(self.instance_name, 'secret')
if testing is None:
testing = config.get(self.instance_name, 'test_mode')
if testing:
logging.info("In TEST MODE - No Tweets will be made!")
self.twitter_client = WMTTwitterClient(self.instance_name, consumer_key, consumer_secret, access_token, access_token_secret, testing)
# This can be overridden by child classes
self.allow_blank_tweets = False
def check_tweets(self):
"""
Check incoming Tweets, and reply to them
"""
tweets = self.twitter_client.fetch_tweets()
for tweet in tweets:
# If the Tweet is not valid (e.g. not directly addressed, from ourselves) then skip it
if not self.validate_tweet(tweet):
continue
# Try processing the Tweet. This may fail with a WhensMyTransportException for a number of reasons, in which
# case we catch the exception and process an apology accordingly
try:
replies = self.process_tweet(tweet)
except WhensMyTransportException as exc:
replies = (self.process_wmt_exception(exc),)
# Other Python Exceptions may occur too - we handle these by DMing the admin with an alert
except Exception as exc:
self.alert_admin_about_exception(tweet, exc.__class__.__name__)
replies = (self.process_wmt_exception(WhensMyTransportException('unknown_error')),)
# If the reply is blank, probably didn't contain a bus number or Tube line, so check to see if there was a thank-you
if not replies:
replies = self.check_politeness(tweet)
# Send a reply back, if we have one
for reply in replies:
# DMs and @ replies have different structures and different handlers
if is_direct_message(tweet):
self.twitter_client.send_reply_back(reply, tweet.sender.screen_name, True, tweet.id)
else:
self.twitter_client.send_reply_back(reply, tweet.user.screen_name, False, tweet.id)
self.twitter_client.check_followers()
def validate_tweet(self, tweet):
"""
Check to see if a Tweet is valid (i.e. we want to reply to it). Tweets from ourselves, and mentions that
are not directly addressed to us, are ignored
"""
message = tweet.text
# Bit of logging, plus we always return True for DMs
if is_direct_message(tweet):
logging.info("Have a DM from %s: %s", tweet.sender.screen_name, message)
return True
else:
username = tweet.user.screen_name
logging.info("Have an @ reply from %s: %s", username, message)
# Don't start talking to yourself
if username == self.username:
logging.debug("Not talking to myself, that way madness lies")
return False
# Ignore mentions that are not direct replies
if not message.lower().startswith('@%s' % self.username.lower()):
logging.debug("Not a proper @ reply, skipping")
return False
return True
def get_tweet_geolocation(self, tweet, user_request):
"""
Ensure any geolocation on a Tweet is valid, and return the co-ordinates as a (latitude, longitude) tuple
"""
if hasattr(tweet, 'geo') and tweet.geo and tweet.geo.has_key('coordinates'):
logging.debug("Detect geolocation on Tweet")
position = tweet.geo['coordinates']
easting, northing = convertWGS84toOSEastingNorthing(*position)
gridref = gridrefNumToLet(easting, northing)
# Grid reference provides us an easy way with checking to see if in the UK - it returns blank string if not in UK bounds
if not gridref:
raise WhensMyTransportException('not_in_uk')
# Check minimums & maximum numeric grid references - corresponding to Chesham (W), Shenfield (E), Dorking (S) and Potters Bar (N)
elif not (495000 <= easting <= 565000 and 145000 <= northing <= 205000):
raise WhensMyTransportException('not_in_london')
else:
return position
# Some people (especially Tweetdeck users) add a Place on the Tweet, but not an accurate enough lat & long
elif hasattr(tweet, 'place') and tweet.place:
raise WhensMyTransportException('placeinfo_only', user_request)
# If there's no geoinformation at all then raise the appropriate exception
else:
if hasattr(tweet, 'geo'):
raise WhensMyTransportException('no_geotag', user_request)
else:
raise WhensMyTransportException('dms_not_taggable', user_request)
def check_politeness(self, tweet):
"""
In case someone's just being nice to us, send them a "No problem"
"""
message = self.sanitize_message(tweet.text).lower()
if message.startswith('thanks') or message.startswith('thank you'):
return ("No problem :)",)
return ()
def process_tweet(self, tweet):
"""
Process a single Tweet object and return a list of replies, one per route or line
e.g.:
'@whensmybus 341 from Clerkenwell' produces
'341 Clerkenwell Road to Waterloo 1241; Rosebery Avenue to Angel Road 1247'
Each reply might be more than 140 characters
"""
# Don't do anything if this is a thank-you
if self.check_politeness(tweet):
return []
# Get route number, from and to from the message
message = tweet.text
(requested_routes, origin, destination) = self.parse_message(message)
if requested_routes == None:
return []
# If no origin specified, let's see if we have co-ordinates on the Tweet
if origin == None:
position = self.get_tweet_geolocation(tweet, ' '.join(requested_routes))
else:
position = None
replies = []
for requested_route in requested_routes:
# Exceptions produced for an individual request are particular to a route/stop combination - e.g. the bus
# given does not stop at the stop given, so we just provide an error message for that circumstance, treat as
# a non-fatal error, and process the next one. The one case where there is a fatal error (TfL's servers are
# down), we raise this exception to be caught higher up by check_tweets()
try:
replies.append(self.process_individual_request(requested_route, origin, destination, position))
except WhensMyTransportException as exc:
if exc.msgid == 'tfl_server_down':
raise
else:
replies.append(self.process_wmt_exception(exc))
return replies
def process_wmt_exception(self, exc):
"""
Turns a WhensMyTransportException into a message for the user
"""
logging.debug("Exception encountered: %s" , exc.value)
return "Sorry! %s" % exc.value
def alert_admin_about_exception(self, tweet, exception_name):
"""
Alert the administrator about a non-WhensMyTransportException encountered when processing a Tweet
"""
if is_direct_message(tweet):
tweet_time = tweet.created_at.strftime('%d-%m-%y %H:%M:%S')
error_message = "Hey! A DM from @%s at %s GMT caused me to crash with a %s" % (tweet.sender.screen_name, tweet_time, exception_name)
else:
twitter_permalink = "https://twitter.com/#!/%s/status/%s" % (tweet.user.screen_name, tweet.id)
error_message = "Hey! A tweet from @%s caused me to crash with a %s: %s" % (tweet.user.screen_name, exception_name, twitter_permalink)
self.twitter_client.send_reply_back(error_message, self.admin_name, True)
def tokenize_message(self, message, request_token_regex=None, request_token_optional=False):
"""
Split a message into tokens
Message is of format: "@username requested_lines_or_routes [from origin] [to destination]"
Tuple returns is of format: (requested_lines_or_routes, origin, destination)
If we cannot find any of these three elements, None is used as default
"""
message = self.sanitize_message(message)
tokens = re.split('\s+', message)
# Sometime people forget to put a 'from' in their message. So we try and put one in for them
# Go through and find the index of the first token that does not match what a request token should be
if "from" not in tokens and request_token_regex:
non_request_token_indexes = [i for i in range(0, len(tokens)) if not re.match("^%s,?$" % request_token_regex, tokens[i], re.I)]
if non_request_token_indexes:
first_non_request_token_index = non_request_token_indexes[0]
if tokens[first_non_request_token_index] != "to":
if first_non_request_token_index > 0 or request_token_optional:
tokens.insert(first_non_request_token_index, "from")
# Work out what boundaries "from" and "to" exist at
if "from" in tokens:
from_index = tokens.index("from")
else:
from_index = len(tokens)
if "to" in tokens:
to_index = tokens.index("to")
elif "towards" in tokens:
to_index = tokens.index("towards")
else:
to_index = len(tokens)
if from_index < to_index:
request = ' '.join(tokens[:from_index]) or None
origin = ' '.join(tokens[from_index+1:to_index]) or None
destination = ' '.join(tokens[to_index+1:]) or None
else:
request = ' '.join(tokens[:to_index]) or None
origin = ' '.join(tokens[from_index+1:]) or None
destination = ' '.join(tokens[to_index+1:from_index]) or None
return (request, origin, destination)
def sanitize_message(self, message):
"""
Takes a message and scrubs out any @username or #hashtags
"""
# Remove hashtags and @username
message = re.sub(r"\s#\w+\b", '', message)
if message.lower().startswith('@%s' % self.username.lower()):
message = message[len('@%s ' % self.username):].lstrip()
else:
message = message.strip()
# Exception if the Tweet contains nothing useful
if not message and not self.allow_blank_tweets:
raise WhensMyTransportException('blank_%s_tweet' % self.instance_name.replace('whensmy', ''))
return message
def parse_message(self, message):
"""
Placeholder function. This must be overridden by a child class to do anything useful
"""
#pylint: disable=W0613
return (None, None, None)
def process_individual_request(self, route_number, origin, destination, position):
"""
Placeholder function. This must be overridden by a child class to do anything useful
"""
#pylint: disable=W0613
return ""
class WhensMyRailTransport(WhensMyTransport):
"""
Parent class for the WhensMyDLR and WhensMyTube bots. This deals with common functionality between the two -
namely looking up stations from a database given a position or name. This works best when there is a limited number of
stations and they have well-known, universally agreed names, which is normally railways and not buses.
"""
def __init__(self, instance_name, testing=False, silent=False):
"""
Constructor, called by child functions
"""
WhensMyTransport.__init__(self, instance_name, testing, silent)
def get_station_by_geolocation(self, line_code, position):
"""
Take a line and a tuple specifying latitude & longitude, and works out closest station
"""
logging.debug("Attempting to get closest to position: %s", position)
return self.geodata.find_closest(position, { 'Line' : line_code }, RailStation)
def get_station_by_station_name(self, line_code, origin):
"""
Take a line and a string specifying origin, and work out matching for that name
"""
logging.debug("Attempting to get a fuzzy match on placename %s", origin)
return self.geodata.find_fuzzy_match({'Line' : line_code}, origin, RailStation)
if __name__ == "__main__":
print "Sorry, this file is not meant to be run directly. Please run either whensmybus.py or whensmytube.py"