Skip to content

Commit

Permalink
Enable import of GADM data using import_geojson
Browse files Browse the repository at this point in the history
* use `release` to remove AdminBoundaries
* add tests for `import_geojson`
  • Loading branch information
dodobas committed Sep 10, 2018
1 parent 90e9abb commit 6e9c2a5
Show file tree
Hide file tree
Showing 7 changed files with 511 additions and 47 deletions.
17 changes: 12 additions & 5 deletions .coveragerc
Expand Up @@ -18,16 +18,23 @@ exclude_lines =
if 0:
if __name__ == .__main__.:
omit =
*/migrations/*
*/gen/*
*/tests*,
*__init__*,
*settings*,
*management/commands*
*/tests*
*__init__*
*settings*
temba/perf_tests.py
temba/wsgi.py
temba/flows/management/commands/run_audit.py
temba/contacts/management/commands/audit_es_group.py
temba/msgs/management/commands/msg_console.py
temba/api/management/commands/webhook_stats.py
temba/utils/management/commands/perf_test.py
temba/utils/management/commands/migrate_flows.py
temba/utils/management/commands/test_db.py
temba/orgs/management/commands/update_analytics_consent.py
temba/orgs/management/commands/update_analytics_orgs.py
[html]
directory = coverage_html_report
17 changes: 12 additions & 5 deletions .coveragerc_failcheck
Expand Up @@ -19,15 +19,22 @@ exclude_lines =
if 0:
if __name__ == .__main__.:


omit =
*/migrations/*
*/gen/*
*/tests*,
*__init__*,
*settings*,
*management/commands*
*/tests*
*__init__*
*settings*
temba/perf_tests.py
temba/flows/management/commands/run_audit.py
temba/contacts/management/commands/audit_es_group.py
temba/msgs/management/commands/msg_console.py
temba/api/management/commands/webhook_stats.py
temba/utils/management/commands/perf_test.py
temba/utils/management/commands/migrate_flows.py
temba/utils/management/commands/test_db.py
temba/orgs/management/commands/update_analytics_consent.py
temba/orgs/management/commands/update_analytics_orgs.py

[html]
directory = coverage_html_report
2 changes: 1 addition & 1 deletion temba/locations/management/commands/download_geojson.py
Expand Up @@ -38,7 +38,7 @@ def handle(self, *args, **options):
repo = options["repo"]
oauth_token = options["oauth_token"]

if oauth_token:
if oauth_token: # pragma: needs cover
headers = {"Authorization": "token %s" % (oauth_token,)}
else:
headers = {}
Expand Down
117 changes: 83 additions & 34 deletions temba/locations/management/commands/import_geojson.py
Expand Up @@ -6,11 +6,12 @@

from django.contrib.gis.geos import MultiPolygon, Polygon
from django.core.management.base import BaseCommand
from django.db import connection, transaction

from temba.locations.models import AdminBoundary


class Command(BaseCommand): # pragma: no cover
class Command(BaseCommand):
help = "Import our geojson zip file format, updating all our OSM data accordingly."

def add_arguments(self, parser):
Expand All @@ -24,12 +25,12 @@ def import_file(self, filename, file):

# we keep track of all the osm ids we've seen because we remove all admin levels at this level
# which weren't seen. (they have been removed)
seen_osm_ids = []
seen_osm_ids = set()
osm_id = None

# parse our filename.. they are in the format:
# 192787admin2_simplified.json
match = regex.match(r"(\w\d+)admin(\d)(_simplified)?\.json$", filename, regex.V0)
match = regex.match(r"(\w+\d+)admin(\d)(_simplified)?\.json$", filename, regex.V0)
level = None
is_simplified = None
if match:
Expand All @@ -42,9 +43,9 @@ def import_file(self, filename, file):
if match:
level = int(match.group(1))
is_simplified = True if match.group(2) else False
elif not match:
print("Skipping '%s', doesn't match file pattern." % filename)
return
else:
self.stdout.write(self.style.WARNING(f"Skipping '{filename}', doesn't match file pattern."))
return None, set()

# for each of our features
for feature in admin_json["features"]:
Expand All @@ -71,7 +72,9 @@ def import_file(self, filename, file):
if parent_osm_id and parent_osm_id != "None":
parent = AdminBoundary.objects.filter(osm_id=parent_osm_id).first()
if not parent:
print("Skipping %s (%s) as parent %s not found." % (name, osm_id, parent_osm_id))
self.stdout.write(
self.style.SUCCESS(f"Skipping {name} ({osm_id}) as parent {parent_osm_id} not found.")
)
continue

# try to find existing admin level by osm_id
Expand All @@ -83,7 +86,7 @@ def import_file(self, filename, file):

# skip over items with no geometry
if not feature["geometry"] or not feature["geometry"]["coordinates"]:
continue
continue # pragma: can't cover

polygons = []
if feature["geometry"]["type"] == "Polygon":
Expand All @@ -109,7 +112,7 @@ def import_file(self, filename, file):
else:
kwargs["path"] = parent.path + AdminBoundary.PADDED_PATH_SEPARATOR + name

print(" ** updating %s (%s)" % (name, osm_id))
self.stdout.write(self.style.SUCCESS(f" ** updating {name} ({osm_id})"))
boundary = boundary.first()
boundary.update(**kwargs)

Expand All @@ -118,20 +121,32 @@ def import_file(self, filename, file):

# otherwise, this is new, so create it
else:
print(" ** adding %s (%s)" % (name, osm_id))
self.stdout.write(self.style.SUCCESS(f" ** adding {name} ({osm_id})"))
AdminBoundary.create(**kwargs)

# keep track of this osm_id
seen_osm_ids.append(osm_id)
seen_osm_ids.add(osm_id)

# now remove any unseen boundaries
if osm_id:
last_boundary = AdminBoundary.objects.filter(osm_id=osm_id).first()
if last_boundary:
print(" ** removing unseen boundaries (%s)" % (osm_id))
self.stdout.write(self.style.SUCCESS(f" ** removing unseen boundaries ({osm_id})"))
country = last_boundary.get_root()
country.get_descendants().filter(level=level).exclude(osm_id__in=seen_osm_ids).delete()
return country

unseen_boundaries = country.get_descendants().filter(level=level).exclude(osm_id__in=seen_osm_ids)
deleted_count = 0
for unseen_boundary in unseen_boundaries:
unseen_boundary.release()
deleted_count += 1
if deleted_count > 0:
self.stdout.write(f" ** Unseen boundaries removed: {deleted_count}")

return country, seen_osm_ids
else:
return None, set()
else:
return None, set()

def handle(self, *args, **options):
files = options["files"]
Expand All @@ -149,29 +164,63 @@ def handle(self, *args, **options):
if options["country"]:
prefix = "%sadmin" % options["country"]

# sort our filepaths, this will make sure we import 0 levels before 1
# before 2
# sort our filepaths, this will make sure we import 0 levels before 1 and before 2
filepaths.sort()

country = None
# for each file they have given us
for filepath in filepaths:
filename = os.path.basename(filepath)
# if it ends in json, then it is geojson, try to parse it
if filename.startswith(prefix) and filename.endswith("json"):
# read the file entirely
print("=== parsing %s" % filename)

# if we are reading from a zipfile, read it from there
if zipfile:
with zipfile.open(filepath) as json_file:
country = self.import_file(filename, json_file)

# otherwise, straight off the filesystem
else:
with open(filepath) as json_file:
country = self.import_file(filename, json_file)
updated_osm_ids = set()

with transaction.atomic():
# for each file they have given us
for filepath in filepaths:
filename = os.path.basename(filepath)
# if it ends in json, then it is geojson, try to parse it
if filename.startswith(prefix) and filename.endswith("json"):
# read the file entirely
self.stdout.write(self.style.SUCCESS(f"=== parsing {filename}"))

# if we are reading from a zipfile, read it from there
if zipfile:
with zipfile.open(filepath) as json_file:
country, seen_osm_ids = self.import_file(filename, json_file)

# otherwise, straight off the filesystem
else:
with open(filepath) as json_file:
country, seen_osm_ids = self.import_file(filename, json_file)

# add seen osm_ids to the all_upated_osm_ids collection
updated_osm_ids = updated_osm_ids.union(seen_osm_ids)

if country is None:
return

# remove all other unseen boundaries from the database for the country
with connection.cursor() as cursor:
cursor.execute(
"""
DELETE FROM locations_adminboundary WHERE id IN (
with recursive adminboundary_set(id, parent_id, name, depth, path, cycle, osm_id) AS (
SELECT ab.id, ab.parent_id, ab.name, 1, ARRAY[ab.id], false, ab.osm_id
from locations_adminboundary ab
WHERE id = %s
UNION ALL
SELECT ab.id, ab.parent_id, ab.name, abs.depth+1, abs.path || ab.id, ab.id = ANY(abs.path), ab.osm_id
from locations_adminboundary ab , adminboundary_set abs
WHERE not cycle AND ab.parent_id = abs.id
)
SELECT
abs.id
from adminboundary_set abs
WHERE NOT (abs.osm_id = ANY(%s)))
""",
(country.id, list(updated_osm_ids)),
)
self.stdout.write(self.style.SUCCESS(f"Other unseen boundaries removed: {cursor.rowcount}"))

if country:
print(" ** updating paths for all of %s" % country.name)
self.stdout.write(self.style.SUCCESS((f" ** updating paths for all of {country.name}")))
country.update_path()
4 changes: 3 additions & 1 deletion temba/locations/models.py
Expand Up @@ -124,7 +124,9 @@ def _update_child_paths(boundary):
_update_child_paths(self)

def release(self):
AdminBoundary.objects.filter(parent=self).update(parent=None)
for child_boundary in AdminBoundary.objects.filter(parent=self):
child_boundary.release()

self.delete()

@classmethod
Expand Down

0 comments on commit 6e9c2a5

Please sign in to comment.