In [36]:
from imposm.parser import OSMParser
from pprint import pprint
from IPython.display import HTML
import osmapi
import json
import urllib
from pygeocoder import Geocoder, GeocoderError
import goslate

In [86]:
class OSMContext:
    def __init__(self):
        self.nodes = {}
        self.ways = {}
        self.relations = {}
        self.api = osmapi.OsmApi()
        
    def add_nodes(self, data):
        nodes = [Node(self, osmid, tags, members) for osmid, tags, members in data]
        for node in nodes:
            self.nodes[node.osmid] = node
        return nodes
        
    def add_ways(self, data):
        ways = [Way(self, osmid, tags, members) for osmid, tags, members in data]
        for way in ways:
            self.ways[way.osmid] = way
        return ways
        
    def add_relations(self, data):
        relations = [Relation(self, osmid, tags, members) for osmid, tags, members in data]
        for relation in relations:
            self.relations[relation.osmid] = relation
        return relations
    
    def get_way(self, osmid):
        if osmid in self.ways:
            return self.ways[osmid]
        
    def resolve_nodes(self, node_ids):
        missing = [osmid for osmid in node_ids if osmid not in self.nodes]
        batchsize = 200
        for i in xrange(0, len(missing), batchsize):
            batch = missing[i:i + batchsize]
            print "Requesting %d nodes from OSM.." % len(batch)
            for osmid, json in self.api.NodesGet(batch):
                self.nodes[osmid] = Node(self, osmid, json['tag'], [json['lon'], json['lat']])
        
    def get_node(self, osmid):
        if osmid not in self.nodes:
            # look up the node online
            print "Looking up node %d.." % osmid
            json = self.api.NodeGet(osmid)
            if json is None:
                # we can't do anything in this situation
                return
            self.nodes[osmid] = Node(self, osmid, json['tag'], [json['lon'], json['lat']])
        return self.nodes[osmid]
        
    def get_relation(self, osmid):
        if osmid in self.relations:
            return self.relations[osmid]

class OSMObject:
    def __init__(self, context, osmid, tags, members):
        self.context = context
        self.osmid = osmid
        self.tags = tags
        self.members = members
        
class Position:
    def __init__(self, lat, lng):
        self.lat = lat
        self.lng = lng
        
    def __str__(self):
        return "(lat: %s, lng: %s)" % (self.lat, self.lng)
        
class Node(OSMObject):
    @property
    def position(self):
        return Position(self.members[1], self.members[0])
    
    def __str__(self):
        return "Node%s" % str(self.position)
    
    def __repr__(self):
        return str(self)
        
class Relation(OSMObject):
    @property
    def url(self):
        return "http://www.openstreetmap.org/relation/%s" % self.osmid
    
    def all_node_ids(self):
        return [osmid for osmid, osmtype, _ in self.members if osmtype.lower() == "node"]
    
    def get_ways(self):
        return filter(lambda x: x is not None, [self.context.get_way(osmid) for osmid, osmtype, _ in self.members if osmtype.lower() == "way"])
    
    def get_nodes(self):
        return filter(lambda x: x is not None, [self.context.get_node(osmid) for osmid in self.all_node_ids()])
    
    @property
    def start_point(self):
        ways = self.get_ways()
        if len(ways) > 0:
            return ways[0].start_point
        for osmid in self.all_node_ids():
            node = self.context.get_node(osmid)
            if node is not None:
                return node
    
    @property
    def end_point(self):
        ways = self.get_ways()
        if len(ways) > 0:
            return ways[-1].end_point
        for osmid in reversed(self.all_node_ids()):
            node = self.context.get_node(osmid)
            if node is not None:
                return node
        
    @property
    def points(self):
        all_points = self.get_nodes()
        for way in self.get_ways():
            all_points.extend(way.points)
        return filter(lambda x: x is not None, all_points)

class Way(OSMObject):
    @property
    def url(self):
        return "http://www.openstreetmap.org/way/%s" % self.osmid
    
    def all_node_ids(self):
        return self.members
    
    def get_nodes(self):
        return filter(lambda x: x is not None, [self.context.get_node(osmid) for osmid in self.all_node_ids()])
    
    @property
    def start_point(self):
        for osmid in self.all_node_ids():
            node = self.context.get_node(osmid)
            if node is not None:
                return node
    
    @property
    def end_point(self):
        for osmid in reversed(self.all_node_ids()):
            node = self.context.get_node(osmid)
            if node is not None:
                return node
    
    @property
    def points(self):
        return self.get_nodes()

class FerryRoute:
    def __init__(self, osmobj):
        self.osmobj = osmobj
        
    @property
    def url(self):
        if 'website' in self.osmobj.tags:
            #assert 'url' not in self.tags, "url:%s, website:%s" % (self.tags['url'], self.tags['website'])
            return self.osmobj.tags['website']
        if 'url' in self.osmobj.tags:
            #assert 'website' not in self.tags
            return self.osmobj.tags['url']
        for key, value in self.osmobj.tags.iteritems():
            if "http" in value.lower():
                return value
        return None
    
    @property
    def name(self):
        if 'name' in self.osmobj.tags:
            return self.osmobj.tags['name']
        if 'name:en' in self.osmobj.tags:
            return self.osmobj.tags['name:en']
        if 'name:int' in self.osmobj.tags:
            return self.osmobj.tags['name:int']
        for key in self.osmobj.tags.keys():
            if 'name' in key:
                return self.osmobj.tags[key]
        if 'ref' in self.osmobj.tags:
            return self.osmobj.tags['ref']
        return None
    
class Operator:
    def __init__(self, name):
        self.name = name
        
context = OSMContext()
operator_map = {}
ferries = []

def relations(objects):
    if objects:
        make_ferry(context.add_relations(objects))

def ways(objects):
    if objects:
        make_ferry(context.add_ways(objects))
        
def nodes(objects):
    if objects:
        context.add_nodes(objects)

def make_ferry(objects):
    for osmobj in objects:
        if 'operator' in osmobj.tags:
            if osmobj.tags['operator'] not in operator_map:
                operator_map[osmobj.tags['operator']] = Operator(osmobj.tags['operator'])
            operator = operator_map[osmobj.tags['operator']]
        else:
            if 'no operator' not in operator_map:
                operator_map['no operator'] = Operator('no operator')
            operator = operator_map['no operator']
        ferries.append(FerryRoute(osmobj))

In [87]:
def ferries_html(title, ferries):
    s = "<h2>%s</h2>" % title
    s += "<ul>"
    for route in ferries:
        s += "<li><a href='%s'>" % route.osmobj.url
        if route.name is None:
            s += "(unnamed ferry)"
        else:
            if route.url is not None:
                url = "(<a href='%s'>website</a>)" % route.url
            else:
                url = ""
            s += "<b>%s</b> %s" % (route.name, url)
        s += "</a></li>"
    s += "</ul>"
    return s

In [88]:
def get_country(positions):
    url = "http://localhost:15555/api/1.2/json/ReverseGeocode?key=oaSfxu2D&lat=%s&lng=%s"
    for pos in positions:
        query = url % (pos.lat, pos.lng)
        try:
            data = json.loads(urllib.urlopen(query).read())
            first_place = data['places'][0]
            if 'countryCode' in first_place and first_place['countryCode'] != "":
                return first_place['countryCode']
        except:
            print "error executing query: %s" % query
            raise

In [89]:
parser = OSMParser(relations_callback=relations, ways_callback=ways, nodes_callback=nodes, concurrency=4)
#ways_callback=ways
parser.parse(r"/home/mgi/raw/ferries.pbf")

#operators = [value for _, value in sorted(operator_map.iteritems(), key=lambda (k, v): k)]

In [90]:
all_node_ids = [osmid for ferry in ferries for osmid in ferry.osmobj.all_node_ids()]
context.resolve_nodes(all_node_ids)

ApiError: Request failed: 404 - Not Found - 

Requesting 200 nodes from OSM..


In [50]:
# WARNING! this is slow the first time, as osmfilter is filtering out many of the nodes (I have to look them up manually)
# see: https://help.openstreetmap.org/questions/41897/osmfilter-misses-dependent-nodes

ferry_by_country = {}
for i, ferry in enumerate(ferries):
    if i % 1000 == 0:
        print "Resolving ferry %d/%d" % (i + 1, len(ferries))
    points = filter(lambda x: x is not None, [ferry.osmobj.start_point, ferry.osmobj.end_point])
    
    if not points:
        print "No points associated with ferry: %s" % ferry.osmobj.url
    else:
        country = get_country(x.position for x in points)
        if country is None:
            print "No country found:", ferry.name, "URL:", ferry.osmobj.url, "Points attempted:", points
        else:
            if country not in ferry_by_country:
                ferry_by_country[country] = []
            ferry_by_country[country].append(ferry)

Resolving ferry 1/12389
Looking up node 262518373..
No points associated with ferry: http://www.openstreetmap.org/relation/305509
No points associated with ferry: http://www.openstreetmap.org/relation/404473
No points associated with ferry: http://www.openstreetmap.org/relation/451606
No points associated with ferry: http://www.openstreetmap.org/relation/452743
No points associated with ferry: http://www.openstreetmap.org/relation/931812
No points associated with ferry: http://www.openstreetmap.org/relation/945839
No points associated with ferry: http://www.openstreetmap.org/relation/945843
No points associated with ferry: http://www.openstreetmap.org/relation/1131945
No points associated with ferry: http://www.openstreetmap.org/relation/1182984
No points associated with ferry: http://www.openstreetmap.org/relation/1182990
Looking up node 1651608939..
Looking up node 1651608935..
No points associated with ferry: http://www.openstreetmap.org/relation/1629592
Looking up node 290907612..


KeyboardInterrupt: 




In [39]:
code = 'JP'

gs = goslate.Goslate()
s = unicode("<h1>Ferries in %s</h1>" % code)
for ferry in ferry_by_country[code]:
    url = ferry.url
    if ferry.name is None:
        name = "<small><b>unknown</b></small>"
    else:
        name = gs.translate(ferry.name, "en")
    if url is None:
        s += name
    else:
        s += "<a href='%s'>%s</a>" % (url, name)
    s += " <small>(<a href='%s'>view on osm</a>)</small><br/>" % ferry.osmobj.url
HTML(s)

In [49]:
import pycountry
total = 0
for country, ferry_lines in ferry_by_country.iteritems():
    print "%s: %d ferry lines" % (pycountry.countries.get(alpha2=country).name, len(ferry_lines))
    total += len(ferry_lines)
print "TOTAL: %d" % total

Bangladesh: 3 ferry lines
Belgium: 16 ferry lines
Bulgaria: 1 ferry lines
Bermuda: 3 ferry lines
Brunei Darussalam: 1 ferry lines
Bolivia, Plurinational State of: 1 ferry lines
Japan: 23 ferry lines
Botswana: 1 ferry lines
Brazil: 34 ferry lines
Bahamas: 2 ferry lines
Russian Federation: 81 ferry lines
Rwanda: 2 ferry lines
Romania: 6 ferry lines
Guatemala: 4 ferry lines
Greece: 87 ferry lines
Guyana: 7 ferry lines
Guernsey: 3 ferry lines
Georgia: 5 ferry lines
Grenada: 1 ferry lines
United Kingdom: 162 ferry lines
Gambia: 3 ferry lines
Greenland: 1 ferry lines
Ghana: 3 ferry lines
Tunisia: 2 ferry lines
Croatia: 35 ferry lines
Hungary: 24 ferry lines
Costa Rica: 1 ferry lines
Portugal: 8 ferry lines
Paraguay: 1 ferry lines
Panama: 5 ferry lines
Philippines: 54 ferry lines
Poland: 40 ferry lines
Estonia: 3 ferry lines
Egypt: 4 ferry lines
South Africa: 2 ferry lines
Ecuador: 1 ferry lines
Albania: 1 ferry lines
Spain: 48 ferry lines
Montenegro: 1 ferry lines
Moldova, Republic of: 1 fer