Skip to content

Commit

Permalink
Split WMTLocations into separate services for Bus and Tube
Browse files Browse the repository at this point in the history
  • Loading branch information
qwghlm committed Apr 19, 2012
1 parent e590417 commit a4da800
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 74 deletions.
10 changes: 5 additions & 5 deletions destinationcodes.py
Expand Up @@ -9,8 +9,8 @@
from datatools import get_tfl_prediction_summaries
from lib.database import WMTDatabase
from lib.dataparsers import filter_tube_train
from lib.locations import WMTLocations
from lib.models import TubeTrain, RailStation
from lib.locations import RailStationLocations
from lib.models import TubeTrain


def scrape_tfl_destination_codes():
Expand Down Expand Up @@ -38,7 +38,7 @@ def check_tfl_destination_codes():
Audit codes we have recorded and make sure that they are all fine
"""
# Check to see if destination is in our database
geodata = WMTLocations('whensmytube')
geodata = RailStationLocations()
database = WMTDatabase("whensmytube.destinationcodes.db")

rows = database.get_rows("SELECT destination_name, destination_code, line_code FROM destination_codes")
Expand All @@ -50,10 +50,10 @@ def check_tfl_destination_codes():
continue
train = TubeTrain(destination_name, "Northbound", "1200", "C", "001")
destination = train.get_destination_no_via()
if destination != "Unknown" and not geodata.find_fuzzy_match({}, destination, RailStation):
if destination != "Unknown" and not geodata.find_fuzzy_match({}, destination):
print "Destination %s (%s) on %s not found in locations database" % (destination_name, destination_code, line_code)
via = train.get_via()
if via and not geodata.find_fuzzy_match({}, via, RailStation):
if via and not geodata.find_fuzzy_match({}, via):
print "Via %s (from %s) on %s not found in locations database" % (via, destination_name, line_code)

if __name__ == "__main__":
Expand Down
60 changes: 34 additions & 26 deletions lib/locations.py
Expand Up @@ -12,7 +12,7 @@
# http://code.google.com/p/python-graph/
from pygraph.algorithms.minmax import shortest_path

from lib.models import RailStation
from lib.models import Location, BusStop, RailStation
from lib.stringutils import get_best_fuzzy_match
from lib.database import WMTDatabase
from lib.geo import convertWGS84toOSEastingNorthing
Expand All @@ -24,27 +24,14 @@
class WMTLocations():
"""
Service object used to find stops or stations (locations) - given a position, exact match or fuzzy match,
will return the best matching stop
will return the best matching stop. Subclassed and not called directly
"""
def __init__(self, instance_name, load_network=True):
def __init__(self, instance_name):
self.database = WMTDatabase('%s.geodata.db' % instance_name)
self.network = None
self.returned_object = Location

if instance_name == 'whensmybus':
filename = 'whensmybus'
elif instance_name == 'whensmytube' or instance_name == 'whensmydlr':
filename = 'whensmytrain'
else:
logging.error("No data files exist for instance name %s, aborting", instance_name)
raise RuntimeError("No data files exist for instance name %s, aborting" % instance_name)

self.database = WMTDatabase('%s.geodata.db' % filename)
network_file = DB_PATH + '/%s.network.gr' % filename
if load_network and os.path.exists(network_file):
logging.debug("Opening network node data %s", os.path.basename(network_file))
self.network = pickle.load(open(network_file))
else:
self.network = None

def find_closest(self, position, params, returned_object):
def find_closest(self, position, params):
"""
Find the closest location to the (lat, long) position specified, querying the database with dictionary params, of the format
{ Column Name : value }. Returns an object of class returned_object, or None if none found nearby
Expand All @@ -68,14 +55,14 @@ def find_closest(self, position, params, returned_object):
""" % (easting, easting, northing, northing, where_statement)
row = self.database.get_row(query, where_values)
if row:
obj = returned_object(Distance=sqrt(row['dist_squared']), **row)
obj = self.returned_object(Distance=sqrt(row['dist_squared']), **row)
logging.debug("Have found nearest location %s", obj)
return obj
else:
logging.debug("No location found near %s, sorry", position)
return None

def find_fuzzy_match(self, stop_or_station_name, params, returned_object):
def find_fuzzy_match(self, stop_or_station_name, params):
"""
Find the best fuzzy match to the query_string, querying the database with dictionary params, of the format
{ Column Name : value, }. Returns an object of class returned_object, or None if no fuzzy match found
Expand All @@ -85,32 +72,53 @@ def find_fuzzy_match(self, stop_or_station_name, params, returned_object):
# Try to get an exact match first against station names in database
exact_params = params.copy()
exact_params.update({'name': stop_or_station_name})
exact_match = self.find_exact_match(exact_params, returned_object)
exact_match = self.find_exact_match(exact_params)
if exact_match:
return exact_match

# Users may not give exact details, so we try to match fuzzily
(where_statement, where_values) = self.database.make_where_statement('locations', params)
rows = self.database.get_rows("SELECT * FROM locations WHERE %s" % where_statement, where_values)
possible_matches = [returned_object(**row) for row in rows]
possible_matches = [self.returned_object(**row) for row in rows]
best_match = get_best_fuzzy_match(stop_or_station_name, possible_matches)
if best_match:
return best_match
else:
return None

def find_exact_match(self, params, returned_object):
def find_exact_match(self, params):
"""
Find the exact match for an item matching params. Returns an object of class returned_object, or None if no
fuzzy match found
"""
(where_statement, where_values) = self.database.make_where_statement('locations', params)
row = self.database.get_row("SELECT * FROM locations WHERE %s LIMIT 1" % where_statement, where_values)
if row:
return returned_object(**row)
return self.returned_object(**row)
else:
return None


class BusStopLocations(WMTLocations):
"""
Service object used to find bus stop - given a position, exact match or fuzzy match, will return the best matching BusStop
"""
def __init__(self):
WMTLocations.__init__(self, 'whensmybus')
self.returned_object = BusStop


class RailStationLocations(WMTLocations):
"""
Service object used to find rail stations - given a position, exact match or fuzzy match, will return the best matching RailStation
"""
def __init__(self):
WMTLocations.__init__(self, 'whensmytrain')
network_file = DB_PATH + '/whensmytrain.network.gr'
logging.debug("Opening network node data %s", os.path.basename(network_file))
self.network = pickle.load(open(network_file))
self.returned_object = RailStation

def get_lines_serving(self, origin, destination=None):
"""
Return a list of line codes that the RailStation origin is served by. If RailStation destination is specified, then
Expand Down
62 changes: 31 additions & 31 deletions testing.py
Expand Up @@ -656,9 +656,9 @@ def test_location(self):
"""
Unit tests for WMTLocation object and the bus database
"""
self.assertEqual(self.bot.geodata.find_closest((51.5124, -0.0397), {'run': '1', 'route': '15'}, BusStop).number, "53410")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Limehouse Sta", {'run': '1', 'route': '15'}, BusStop).number, "53410")
self.assertEqual(self.bot.geodata.find_exact_match({'run': '1', 'route': '15', 'name': 'LIMEHOUSE TOWN HALL'}, BusStop).number, "48264")
self.assertEqual(self.bot.geodata.find_closest((51.5124, -0.0397), {'run': '1', 'route': '15'}).number, "53410")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Limehouse Sta", {'run': '1', 'route': '15'}).number, "53410")
self.assertEqual(self.bot.geodata.find_exact_match({'run': '1', 'route': '15', 'name': 'LIMEHOUSE TOWN HALL'}).number, "48264")
self.assertTrue(self.bot.geodata.database.check_existence_of('locations', 'bus_stop_code', '47001'))
self.assertFalse(self.bot.geodata.database.check_existence_of('locations', 'bus_stop_code', '47000'))
self.assertEqual(self.bot.geodata.database.get_max_value('locations', 'run', {}), 4)
Expand Down Expand Up @@ -851,15 +851,15 @@ def test_location(self):
Unit tests for WMTLocation object and the Tube database
"""
# Test station-finding works
self.assertEqual(self.bot.geodata.find_closest((51.529444, -0.126944), {}, RailStation).code, "KXX")
self.assertEqual(self.bot.geodata.find_closest((51.529444, -0.126944), {'line': 'M'}, RailStation).code, "KXX")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Kings Cross", {}, RailStation).code, "KXX")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Kings Cross", {'line': 'M'}, RailStation).code, "KXX")
self.assertEqual(self.bot.geodata.find_closest((51.529444, -0.126944), {}).code, "KXX")
self.assertEqual(self.bot.geodata.find_closest((51.529444, -0.126944), {'line': 'M'}).code, "KXX")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Kings Cross", {}).code, "KXX")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Kings Cross", {'line': 'M'}).code, "KXX")

# Test route-tracing works as expected
stockwell = self.bot.geodata.find_fuzzy_match("Stockwell", {}, RailStation)
bank = self.bot.geodata.find_fuzzy_match("Bank", {}, RailStation)
euston = self.bot.geodata.find_fuzzy_match("Euston", {}, RailStation)
stockwell = self.bot.geodata.find_fuzzy_match("Stockwell", {})
bank = self.bot.geodata.find_fuzzy_match("Bank", {})
euston = self.bot.geodata.find_fuzzy_match("Euston", {})
self.assertEqual(sorted(self.bot.geodata.get_lines_serving(stockwell)), ['N', 'V'])
self.assertEqual(sorted(self.bot.geodata.get_lines_serving(bank)), ['C', 'N', 'W'])
self.assertEqual(self.bot.geodata.length_of_route(stockwell, euston), 18)
Expand All @@ -870,13 +870,13 @@ def test_location(self):


# Test route-testing works as expected
west_ruislip = self.bot.geodata.find_fuzzy_match("West Ruislip", {}, RailStation)
hainault = self.bot.geodata.find_fuzzy_match("Hainault", {}, RailStation)
roding_valley = self.bot.geodata.find_fuzzy_match("Roding Valley", {}, RailStation)
wanstead = self.bot.geodata.find_fuzzy_match("Wanstead", {}, RailStation)
snaresbrook = self.bot.geodata.find_fuzzy_match("Snaresbrook", {}, RailStation)
heathrow123 = self.bot.geodata.find_fuzzy_match("Heathrow Terminals 1, 2, 3", {}, RailStation)
heathrow4 = self.bot.geodata.find_fuzzy_match("Heathrow Terminal 4", {}, RailStation)
west_ruislip = self.bot.geodata.find_fuzzy_match("West Ruislip", {})
hainault = self.bot.geodata.find_fuzzy_match("Hainault", {})
roding_valley = self.bot.geodata.find_fuzzy_match("Roding Valley", {})
wanstead = self.bot.geodata.find_fuzzy_match("Wanstead", {})
snaresbrook = self.bot.geodata.find_fuzzy_match("Snaresbrook", {})
heathrow123 = self.bot.geodata.find_fuzzy_match("Heathrow Terminals 1, 2, 3", {})
heathrow4 = self.bot.geodata.find_fuzzy_match("Heathrow Terminal 4", {})
self.assertTrue(self.bot.geodata.direct_route_exists(west_ruislip, west_ruislip, "C"))
self.assertTrue(self.bot.geodata.direct_route_exists(west_ruislip, hainault, "C"))
self.assertTrue(self.bot.geodata.direct_route_exists(west_ruislip, roding_valley, "C", via=hainault))
Expand All @@ -888,8 +888,8 @@ def test_location(self):


# Test direction-finding works as expected
morden = self.bot.geodata.find_fuzzy_match("Morden", {}, RailStation)
high_barnet = self.bot.geodata.find_fuzzy_match("High Barnet", {}, RailStation)
morden = self.bot.geodata.find_fuzzy_match("Morden", {})
high_barnet = self.bot.geodata.find_fuzzy_match("High Barnet", {})
self.assertTrue(self.bot.geodata.is_correct_direction("Eastbound", west_ruislip, hainault, 'C'))
self.assertTrue(self.bot.geodata.is_correct_direction("Westbound", hainault, west_ruislip, 'C'))
self.assertTrue(self.bot.geodata.is_correct_direction("Northbound", morden, high_barnet, 'N'))
Expand All @@ -898,21 +898,21 @@ def test_location(self):
self.assertFalse(self.bot.geodata.is_correct_direction("Southbound", morden, high_barnet, 'N'))

# DLR Location tests
self.assertEqual(self.bot.geodata.find_closest((51.5124, -0.0397), {}, RailStation).code, "lim")
self.assertEqual(self.bot.geodata.find_closest((51.5124, -0.0397), {'line': 'DLR'}, RailStation).code, "lim")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Limehouse", {}, RailStation).code, "lim")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Limehouse", {'line': 'DLR'}, RailStation).code, "lim")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Stratford Int", {}, RailStation).code, "sti")
self.assertEqual(self.bot.geodata.find_fuzzy_match("W'wich Arsenal", {}, RailStation).code, "woa")

stratford = self.bot.geodata.find_fuzzy_match("Stratford", {}, RailStation)
beckton = self.bot.geodata.find_fuzzy_match("Beckton", {}, RailStation)
poplar = self.bot.geodata.find_fuzzy_match("Poplar", {}, RailStation)
self.assertEqual(self.bot.geodata.find_closest((51.5124, -0.0397), {}).code, "lim")
self.assertEqual(self.bot.geodata.find_closest((51.5124, -0.0397), {'line': 'DLR'}).code, "lim")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Limehouse", {}).code, "lim")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Limehouse", {'line': 'DLR'}).code, "lim")
self.assertEqual(self.bot.geodata.find_fuzzy_match("Stratford Int", {}).code, "sti")
self.assertEqual(self.bot.geodata.find_fuzzy_match("W'wich Arsenal", {}).code, "woa")

stratford = self.bot.geodata.find_fuzzy_match("Stratford", {})
beckton = self.bot.geodata.find_fuzzy_match("Beckton", {})
poplar = self.bot.geodata.find_fuzzy_match("Poplar", {})
self.assertIn(('West Ham', '', 'DLR'), self.bot.geodata.describe_route(stratford, beckton))
self.assertIn(('Blackwall', '', 'DLR'), self.bot.geodata.describe_route(stratford, beckton, "DLR", poplar))

limehouse = self.bot.geodata.find_fuzzy_match("Limehouse", {}, RailStation)
all_saints = self.bot.geodata.find_fuzzy_match("All Saints", {}, RailStation)
limehouse = self.bot.geodata.find_fuzzy_match("Limehouse", {})
all_saints = self.bot.geodata.find_fuzzy_match("All Saints", {})
self.assertTrue(self.bot.geodata.direct_route_exists(limehouse, beckton, "DLR"))
self.assertFalse(self.bot.geodata.direct_route_exists(limehouse, all_saints, "DLR"))
self.assertTrue(self.bot.geodata.is_correct_direction("Eastbound", limehouse, beckton, "DLR"))
Expand Down
10 changes: 6 additions & 4 deletions whensmybus.py
Expand Up @@ -24,7 +24,8 @@
from lib.dataparsers import parse_bus_data
from lib.geo import heading_to_direction
from lib.exceptions import WhensMyTransportException
from lib.models import BusStop, NullDeparture, DepartureCollection
from lib.locations import BusStopLocations
from lib.models import NullDeparture, DepartureCollection
from lib.textparser import WMTBusParser


Expand All @@ -39,6 +40,7 @@ def __init__(self, testing=False):
"""
WhensMyTransport.__init__(self, 'whensmybus', testing)
self.parser = WMTBusParser()
self.geodata = BusStopLocations()

def process_individual_request(self, route_number, origin, destination, direction, position=None):
"""
Expand Down Expand Up @@ -102,7 +104,7 @@ def get_stops_by_geolocation(self, route_number, position):
logging.debug("Have found total of %s runs", max_runs)
relevant_stops = {}
for run in range(1, max_runs + 1):
stop = self.geodata.find_closest(position, {'route': route_number, 'run': run}, BusStop)
stop = self.geodata.find_closest(position, {'route': route_number, 'run': run})
if stop:
relevant_stops[run] = stop
logging.debug("Have found stop numbers: %s", ', '.join([stop.number for stop in relevant_stops.values()]))
Expand All @@ -119,7 +121,7 @@ def get_stops_by_stop_number(self, route_number, stop_number):

# Try and get a match on it
logging.debug("Attempting to get an exact match on stop SMS ID %s", stop_number)
stop = self.geodata.find_exact_match({'bus_stop_code': stop_number, 'route': route_number}, BusStop)
stop = self.geodata.find_exact_match({'bus_stop_code': stop_number, 'route': route_number})
if stop:
logging.debug("Have found stop number: %s", stop.number)
return {stop.run: stop}
Expand All @@ -146,7 +148,7 @@ def get_stops_by_stop_name(self, route_number, stop_name):
# A route typically has two "runs" (e.g. one eastbound, one west) but some have more than that, so work out how many we have to check
max_runs = self.geodata.database.get_max_value('locations', 'run', {'route': route_number})
for run in range(1, max_runs + 1):
best_match = self.geodata.find_fuzzy_match(stop_name, {'route': route_number, 'run': run}, BusStop)
best_match = self.geodata.find_fuzzy_match(stop_name, {'route': route_number, 'run': run})
if best_match:
logging.info("Found stop name %s for Run %s by fuzzy matching", best_match.name, best_match.run)
relevant_stops[run] = best_match
Expand Down
8 changes: 5 additions & 3 deletions whensmytrain.py
Expand Up @@ -22,7 +22,8 @@
from whensmytransport import WhensMyTransport
from lib.dataparsers import parse_dlr_data, parse_tube_data
from lib.exceptions import WhensMyTransportException
from lib.models import RailStation, NullDeparture
from lib.locations import RailStationLocations
from lib.models import NullDeparture
from lib.stringutils import get_best_fuzzy_match
from lib.textparser import WMTTrainParser

Expand Down Expand Up @@ -60,6 +61,7 @@ def __init__(self, instance_name, testing=False):
else:
self.default_requested_route = 'Tube'
self.parser = WMTTrainParser()
self.geodata = RailStationLocations()

# Create lookup dict for line names
self.line_lookup = dict([(name, name) for (_code, name) in LINE_NAMES.keys()])
Expand Down Expand Up @@ -152,7 +154,7 @@ def get_station_by_geolocation(self, position, line_code=None):
params = {}
if line_code:
params['line'] = line_code
return self.geodata.find_closest(position, params, RailStation)
return self.geodata.find_closest(position, params)

def get_station_by_station_name(self, station_name, line_code=None):
"""
Expand All @@ -162,7 +164,7 @@ def get_station_by_station_name(self, station_name, line_code=None):
params = {}
if line_code:
params['line'] = line_code
return self.geodata.find_fuzzy_match(station_name, params, RailStation)
return self.geodata.find_fuzzy_match(station_name, params)

def get_canonical_station_name(self, station_name, line_code):
"""
Expand Down

0 comments on commit a4da800

Please sign in to comment.