diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66d464d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Ignore output of scraper +data.sqlite diff --git a/README.textile b/README.textile new file mode 100644 index 0000000..b447240 --- /dev/null +++ b/README.textile @@ -0,0 +1 @@ +Haringey Wards 2 \ No newline at end of file diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..1a4b1cb --- /dev/null +++ b/scraper.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import json + +import requests +import scraperwiki.sqlite as db + +PARENT_ID = 2511# London Borough of Haringey + + + +def iter_children_areas_kml(parent_id): + children = getjs('http://mapit.mysociety.org/area/%s/children' % parent_id) + for id, data in children.items(): + kml = requests.get('http://mapit.mysociety.org/area/%s.kml' % id).content + entry = {'parent_area': int(data['parent_area']), + 'id': int(id), + 'name': data['name'], + 'kml': kml} + yield entry + + +def getjs(url, **opts): + return json.loads(requests.get(url, **opts).content) + + +# +# Main +# +data = list(iter_children_areas_kml(PARENT_ID)) +db.save(['id'], data, verbose=0)