Skip to content
This repository
Browse code

Switch from using the remote Overpass API server to osm3s_query locally

The KML generation scripts for MapIt Global used to make a huge number
of queries to http://www.overpass-api.de/api/interpreter - this is
slow and hits their service rather hard.  Setting up a local server
and running queries using the osm3s_query binary is obviously a great
deal faster, and doesn't put large load on an external service.
  • Loading branch information...
commit d45ed31b897ef66484016124c07f2a22d89f0ba2 1 parent 3518db7
Mark Longair authored November 07, 2012
68  bin/boundaries.py
@@ -5,6 +5,9 @@
5 5
 from lxml import etree
6 6
 from tempfile import mkdtemp, NamedTemporaryFile
7 7
 from StringIO import StringIO
  8
+from subprocess import Popen, PIPE
  9
+
  10
+osm3s_db_directory = "/home/overpass/db/"
8 11
 
9 12
 # Suggested by http://stackoverflow.com/q/600268/223092
10 13
 def mkdir_p(path):
@@ -47,6 +50,43 @@ def mkdir_p(path):
47 50
         else:
48 51
             raise
49 52
 
  53
+def get_query_relation_and_dependents(element_type, element_id):
  54
+    return """<osm-script timeout="3600">
  55
+  <union into="_">
  56
+    <id-query into="_" ref="%s" type="%s"/>
  57
+    <recurse from="_" into="_" type="down"/>
  58
+  </union>
  59
+  <print from="_" limit="" mode="body" order="id"/>
  60
+</osm-script>
  61
+""" % (element_id, element_type)
  62
+
  63
+def get_query_boundaries(admin_level):
  64
+    return """<osm-script timeout="3600">
  65
+  <union into="_">
  66
+    <query into="_" type="relation">
  67
+      <has-kv k="boundary" modv="" v="administrative"/>
  68
+      <has-kv k="admin_level" modv="" v="%s"/>
  69
+    </query>
  70
+    <query into="_" type="way">
  71
+      <has-kv k="boundary" modv="" v="administrative"/>
  72
+      <has-kv k="admin_level" modv="" v="%s"/>
  73
+    </query>
  74
+  </union>
  75
+  <print from="_" limit="" mode="body" order="id"/>
  76
+</osm-script>""" % (admin_level, admin_level)
  77
+
  78
+def get_osm3s(query_xml, filename):
  79
+    if not os.path.exists(filename):
  80
+        with open(filename, 'w') as file_output:
  81
+            p = Popen(["osm3s_query",
  82
+                       "--concise",
  83
+                       "--db-dir=" + osm3s_db_directory],
  84
+                      stdin=PIPE,
  85
+                      stdout=file_output)
  86
+            p.communicate(query_xml)
  87
+            if p.returncode != 0:
  88
+                raise Exception, "The osm3s_query failed"
  89
+
50 90
 def get_cache_filename(element_type, element_id, cache_directory=None):
51 91
     if cache_directory is None:
52 92
         script_directory = os.path.dirname(os.path.abspath(__file__))
@@ -1685,27 +1725,6 @@ def get_total_seconds(td):
1685 1725
     """A replacement for timedelta.total_seconds(), that's only in Python >= 2.7"""
1686 1726
     return td.microseconds * 1e-6 + td.seconds + td.days * (24.0 * 60 * 60)
1687 1727
 
1688  
-class RateLimitedPOST:
1689  
-
1690  
-    last_post = None
1691  
-    min_time_between = datetime.timedelta(seconds=0.5)
1692  
-
1693  
-    @staticmethod
1694  
-    def request(url, values, filename, verbose=False):
1695  
-        if RateLimitedPOST.last_post:
1696  
-            since_last = datetime.datetime.now() - RateLimitedPOST.last_post
1697  
-            if since_last < RateLimitedPOST.min_time_between:
1698  
-                difference = RateLimitedPOST.min_time_between - since_last
1699  
-                time.sleep(get_total_seconds(difference))
1700  
-        encoded_values = urllib.urlencode(values)
1701  
-        request = urllib2.Request(url, encoded_values)
1702  
-        if verbose:
1703  
-            print "making request to url:", url
1704  
-        response = urllib2.urlopen(request)
1705  
-        with open(filename, "w") as fp:
1706  
-            fp.write(response.read())
1707  
-        RateLimitedPOST.last_post = datetime.datetime.now()
1708  
-
1709 1728
 def fetch_cached(element_type, element_id, verbose=False, cache_directory=None):
1710 1729
     """Get an OSM element from the Overpass API, with caching on disk
1711 1730
 
@@ -1731,10 +1750,8 @@ def fetch_cached(element_type, element_id, verbose=False, cache_directory=None):
1731 1750
         raise Exception, "Unknown element type '%s'" % (element_type,)
1732 1751
     filename = get_cache_filename(element_type, element_id, cache_directory)
1733 1752
     if not os.path.exists(filename):
1734  
-        url = "http://www.overpass-api.de/api/interpreter"
1735  
-        data = '[timeout:3600];(%s(%s);>;);out;' % arguments
1736  
-        values = {'data': data}
1737  
-        RateLimitedPOST.request(url, values, filename, verbose)
  1753
+        all_dependents_query = get_query_relation_and_dependents(element_type, element_id)
  1754
+        get_osm3s(all_dependents_query, filename)
1738 1755
     return filename
1739 1756
 
1740 1757
 def parse_xml_minimal(filename, element_handler):
@@ -1825,7 +1842,6 @@ def fetch_osm_element(element_type, element_id, fetch_missing=True, verbose=Fals
1825 1842
     >>> tmp_cache2 = mkdtemp()
1826 1843
     >>> fetch_osm_element("relation", "58446", verbose=True, cache_directory=tmp_cache2)
1827 1844
     fetch_osm_element(relation, 58446)
1828  
-    making request to url: http://www.overpass-api.de/api/interpreter
1829 1845
     Relation(id="58446", members=70)
1830 1846
 
1831 1847
     FIXME: fetching a non-existing element really should produce an
22  bin/get-boundaries-by-admin-level.py
@@ -22,19 +22,6 @@
22 22
 dir = os.path.dirname(os.path.abspath(__file__))
23 23
 data_dir = os.path.join(dir, '..', 'data')
24 24
 
25  
-timeout = '[timeout:3600];'
26  
-
27  
-def overpass_post_request(data, filename):
28  
-    """Make an Overpass API call and write to filename (if it doesn't exist)"""
29  
-    if not os.path.exists(filename):
30  
-        url = 'http://www.overpass-api.de/api/interpreter'
31  
-        values = {'data': data}
32  
-        encoded_values = urllib.urlencode(values)
33  
-        request = urllib2.Request(url, encoded_values)
34  
-        response = urllib2.urlopen(request)
35  
-        with open(filename, "w") as fp:
36  
-            fp.write(response.read())
37  
-
38 25
 def replace_slashes(s):
39 26
     return re.sub(r'/', '_', s)
40 27
 
@@ -42,15 +29,10 @@ def replace_slashes(s):
42 29
 
43 30
     print "Fetching data at admin level", admin_level
44 31
 
45  
-    predicate = '["boundary"="administrative"]'
46  
-    predicate += '["admin_level"="%d"]' % (admin_level,)
47  
-    data = timeout + '(relation%s;way%s;);out body;' % (predicate, predicate)
48  
-
49  
-    print "data is:", data
50  
-
51 32
     file_basename = "admin-level-%02d-worldwide.xml" % (admin_level,)
52 33
     xml_filename = os.path.join(data_dir, "cache", file_basename)
53  
-    overpass_post_request(data, xml_filename)
  34
+    admin_level_query = get_query_boundaries(admin_level)
  35
+    get_osm3s(admin_level_query, xml_filename)
54 36
 
55 37
     level_directory = os.path.join(data_dir, "cache", "al%02d" % (admin_level,))
56 38
     mkdir_p(level_directory)

0 notes on commit d45ed31

Please sign in to comment.
Something went wrong with that request. Please try again.