Skip to content

Commit

Permalink
Add data import and update preparation
Browse files Browse the repository at this point in the history
  • Loading branch information
rmarianski committed Jul 28, 2016
1 parent 38f3ef6 commit b7d3145
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 44 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -18,3 +18,7 @@ data/*.shx
# python compiled bytecode and development files
*.pyc
vector_datasource.egg-info/

data/Makefile-import-data
data/Makefile-prepare-data
data/import-shapefiles.sh
32 changes: 32 additions & 0 deletions data/Makefile-import-data.jinja2
@@ -0,0 +1,32 @@
all: shapefiles import

import: import-shapefiles.sh

import-shapefiles.sh: {{ tgt_shapefile_shps }}
@> import-shapefiles.sh
@chmod +x import-shapefiles.sh
{% for shapefile in shapefiles %}
@echo 'shp2pgsql -dID -s 3857 -W UTF-8 -g the_geom {{ shapefile['tgt_shp'] }} {{ shapefile['name'] }}' >> import-shapefiles.sh
{% endfor %}
@echo './import-shapefiles.sh | psql -d <database>'

shapefiles: shapefiles.tar.gz

shapefiles.tar.gz:
wget 'http://s3.amazonaws.com/{{ bucket }}/{{ datestamp }}/shapefiles.tar.gz'

{{ tgt_shapefile_zips }}: shapefiles.tar.gz
tar xzf shapefiles.tar.gz
touch {{ tgt_shapefile_zips }}

{% for shapefile in shapefiles %}
{{ shapefile['tgt_shp'] }}: {{ shapefile['tgt_zip'] }}
unzip -o {{ shapefile['tgt_zip'] }}
test -e {{ shapefile['tgt_shp'] }}
touch {{ shapefile['tgt_shp'] }}
{% endfor %}

clean:
rm -rf shapefiles.tar.gz {{ tgt_shapefile_zips }} {{ tgt_shapefile_wildcards }}

.PHONY: all shapefiles import clean
40 changes: 21 additions & 19 deletions data/Makefile-prepare-data.jinja2
@@ -1,38 +1,40 @@
all: download shapefiles

datestamp = $(shell date +%Y%m%d)

upload: shapefiles
aws s3 cp shapefiles.tar.gz s3://{{ bucket }}/$(datestamp)/shapefiles.tar.gz
if aws s3 ls s3://{{ bucket }}/{{ datestamp }}/shapefiles.tar.gz; then echo 'shapefiles.tar.gz already exists for {{ datestamp }}'; exit 1; else exit 0; fi
aws s3 cp shapefiles.tar.gz s3://{{ bucket }}/{{ datestamp }}/shapefiles.tar.gz

shapefiles: shapefiles.tar.gz

shapefiles.tar.gz: {{ proc_shapefile_zips }}
tar czf shapefiles.tar.gz {{ proc_shapefile_zips }}
shapefiles.tar.gz: {{ tgt_shapefile_zips }}
tar czf shapefiles.tar.gz {{ tgt_shapefile_zips }}

download: {{ shapefile_zips }}
download: {{ src_shapefile_zips }}

{% for shapefile in shapefiles %}
{{ shapefile.name_zip }}:
wget '{{ shapefile.url }}' -O {{ shapefile.name_zip }}
{{ shapefile.src_zip }}:
wget '{{ shapefile.url }}' -O {{ shapefile.src_zip }}
{% endfor %}

{% for shapefile in reproj_shapefiles %}
{{ shapefile.name_shp }}: {{ shapefile.name_zip }}
unzip -o {{ shapefile.name_zip }}
touch {{ shapefile.name_shp }}
{{ shapefile.src_shp }}: {{ shapefile.src_zip }}
unzip -o {{ shapefile.src_zip }}
touch {{ shapefile.src_shp }}

{{ shapefile.reproj_shp }}: {{ shapefile.name_shp }}
OGR_ENABLE_PARTIAL_REPROJECTION=1 ogr2ogr -wrapdateline -t_srs EPSG:3857 -lco encoding=utf8 {{ shapefile.reproj_shp }} {{ shapefile.name_shp }}
{{ shapefile.tgt_shp }}: {{ shapefile.src_shp }}
OGR_ENABLE_PARTIAL_REPROJECTION=1 ogr2ogr -wrapdateline -t_srs EPSG:3857 -lco encoding=utf8 {{ shapefile.tgt_shp }} {{ shapefile.src_shp }}
{% if shapefile.tile %}
python tile-shapefile.py {{ shapefile.reproj_shp }} tiled-{{ shapefile.reproj_shp }}
rm -f {{ shapefile.reproj_shp_wildcard }}
for i in tiled-{{ shapefile.reproj_shp_wildcard }}; do mv -f $$i `echo $$i | sed s/tiled-//`; done
python tile-shapefile.py {{ shapefile.tgt_shp }} tiled-{{ shapefile.tgt_shp }}
rm -f {{ shapefile.tgt_shp_wildcard }}
for i in tiled-{{ shapefile.tgt_shp_wildcard }}; do mv -f $$i `echo $$i | sed s/tiled-//`; done
{% endif %}

{{ shapefile.reproj_zip }}: {{ shapefile.reproj_shp }}
zip {{ shapefile.reproj_zip }} {{ shapefile.reproj_shp_wildcard }}
{{ shapefile.tgt_zip }}: {{ shapefile.tgt_shp }}
zip {{ shapefile.tgt_zip }} {{ shapefile.tgt_shp_wildcard }}

{% endfor %}

.PHONY: download upload shapefiles
clean:
rm -rf shapefiles.tar.gz {{ tgt_shapefile_zips }} {{ tgt_shapefile_wildcards }} {{ src_shapefile_zips }} {{ src_shapefile_wildcards }}

.PHONY: all download upload shapefiles
9 changes: 9 additions & 0 deletions data/assets.yaml
@@ -1,4 +1,5 @@
bucket: mapzen-tiles-assets
datestamp: 20160727

shapefiles:

Expand All @@ -11,10 +12,18 @@ shapefiles:
- name: water_polygons
url: http://data.openstreetmapdata.com/water-polygons-split-3857.zip
prj: 3857
shapefile-name: water-polygons-split-3857/water_polygons.shp
directory: water-polygons-split-3857

- name: land_polygons
url: http://data.openstreetmapdata.com/land-polygons-split-3857.zip
prj: 3857
directory: land-polygons-split-3857
shapefile-name: land-polygons-split-3857/land_polygons.shp

- name: buffered_land
url: http://s3.amazonaws.com/mapzen-tiles-assets/curated/buffered_land.zip
prj: 3857

- name: ne_110m_lakes
url: http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/physical/ne_110m_lakes.zip
Expand Down
99 changes: 74 additions & 25 deletions data/bootstrap.py
Expand Up @@ -7,65 +7,114 @@

dest_prj = asset_cfg.get('prj', 3857)
bucket = asset_cfg['bucket']
datestamp = asset_cfg['datestamp']

template_path = '.'
environment = Environment(loader=FileSystemLoader(template_path))
template = environment.get_template('Makefile-prepare-data.jinja2')
prepare_data_template = environment.get_template('Makefile-prepare-data.jinja2')

shapefile_zips = []
src_shapefile_zips = []
src_shapefile_shps = []
src_shapefile_wildcards = []
shapefiles = []
reproj_shapefile_dep_names = []
reproj_shapefile_tgt_names = []
reproj_shapefiles = []
sameproj_shapefile_zips = []
tgt_shapefile_zips = []
tgt_shapefile_shps = []
tgt_shapefile_wildcards = []
cfg_shapefiles = asset_cfg['shapefiles']

for cfg_shapefile in cfg_shapefiles:
shapefile = cfg_shapefile.copy()
name_zip = shapefile['url'].split('/')[-1]
shapefile['name_zip'] = name_zip
src_zip = shapefile['url'].split('/')[-1]

# in case the name of the shapefile is different than the zip
shapefile_name = shapefile.get('shapefile-name')
if shapefile_name is None:
shapefile['name_shp'] = name_zip.replace('.zip', '.shp')
src_shp = src_zip.replace('.zip', '.shp')
else:
src_shp = shapefile_name

directory = shapefile.get('directory')
if directory:
# these are used for removal in clean target
src_wildcard = directory
tgt_shapefile_wildcards.append(directory)
else:
shapefile['name_shp'] = shapefile_name
src_wildcard = src_shp.replace('.shp', '*')

# but, we should use base the names of the generated shape files
# off the zip file so we're consistent
name_shp = name_zip.replace('.zip', '.shp')
shapefile['src_zip'] = src_zip
shapefile['src_shp'] = src_shp
shapefile['src_wildcard'] = src_wildcard
src_shapefile_zips.append(src_zip)
src_shapefile_shps.append(src_shp)
src_shapefile_wildcards.append(src_wildcard)

if shapefile['prj'] != 3857:
reproj_zip = name_zip.replace('.zip', '-merc.zip')
reproj_shp = name_shp.replace('.shp', '-merc.shp')
shapefile['reproj_zip'] = reproj_zip
shapefile['reproj_shp'] = reproj_shp
shapefile['reproj_shp_wildcard'] = reproj_shp.replace('.shp', '.*')
tgt_zip = src_zip.replace('.zip', '-merc.zip')
tgt_shp = tgt_zip.replace('.zip', '.shp')
shapefile['tgt_zip'] = tgt_zip
shapefile['tgt_shp'] = tgt_shp
tgt_shp_wildcard = tgt_shp.replace('.shp', '*')
shapefile['tgt_shp_wildcard'] = tgt_shp_wildcard

reproj_shapefiles.append(shapefile)
reproj_shapefile_dep_names.append(name_zip)
reproj_shapefile_tgt_names.append(reproj_zip)
reproj_shapefile_dep_names.append(src_zip)
reproj_shapefile_tgt_names.append(tgt_zip)
tgt_shapefile_zips.append(tgt_zip)
tgt_shapefile_shps.append(tgt_shp)
tgt_shapefile_wildcards.append(tgt_shp_wildcard)
else:
sameproj_shapefile_zips.append(name_zip)
sameproj_shapefile_zips.append(src_zip)
tgt_zip = src_zip
tgt_shp = src_shp
shapefile['tgt_zip'] = tgt_zip
shapefile['tgt_shp'] = tgt_shp
tgt_shp_wildcard = tgt_shp.replace('.shp', '*')
shapefile['tgt_shp_wildcard'] = tgt_shp_wildcard
tgt_shapefile_zips.append(tgt_zip)
tgt_shapefile_shps.append(tgt_shp)
tgt_shapefile_wildcards.append(tgt_shp_wildcard)

shapefiles.append(shapefile)
shapefile_zips.append(name_zip)

shapefile_zips_str = ' '.join(shapefile_zips)
src_shapefile_zips_str = ' '.join(src_shapefile_zips)
reproj_shapefile_dep_names_str = ' '.join(reproj_shapefile_dep_names)
reproj_shapefile_tgt_names_str = ' '.join(reproj_shapefile_tgt_names)
sameproj_shapefile_zips_str = ' '.join(sameproj_shapefile_zips)
proc_shapefile_zips_str = '%s %s' % (
sameproj_shapefile_zips_str, reproj_shapefile_tgt_names_str)
result = template.render(
shapefile_zips=shapefile_zips_str,
tgt_shapefile_zips_str = ' '.join(tgt_shapefile_zips)
tgt_shapefile_shps_str = ' '.join(tgt_shapefile_shps)
tgt_shapefile_wildcards_str = ' '.join(tgt_shapefile_wildcards)
src_shapefile_wildcards_str = ' '.join(src_shapefile_wildcards)
prepare_data_makefile = prepare_data_template.render(
src_shapefile_zips=src_shapefile_zips_str,
shapefiles=shapefiles,
reproj_shapefiles=reproj_shapefiles,
reproj_shapefile_dep_names=reproj_shapefile_dep_names_str,
reproj_shapefile_tgt_names=reproj_shapefile_tgt_names_str,
proc_shapefile_zips=proc_shapefile_zips_str,
tgt_shapefile_zips=tgt_shapefile_zips_str,
tgt_shapefile_shps=tgt_shapefile_shps_str,
tgt_shapefile_wildcards=tgt_shapefile_wildcards_str,
src_shapefile_wildcards=src_shapefile_wildcards_str,
bucket=bucket,
datestamp=datestamp,
)
with open('Makefile-prepare-data', 'w') as fh:
fh.write(result)
fh.write(prepare_data_makefile)
fh.write('\n')

import_data_template = environment.get_template('Makefile-import-data.jinja2')
import_data_makefile = import_data_template.render(
bucket=bucket,
datestamp=datestamp,
tgt_shapefile_zips=tgt_shapefile_zips_str,
tgt_shapefile_shps=tgt_shapefile_shps_str,
tgt_shapefile_wildcards=tgt_shapefile_wildcards_str,
src_shapefile_wildcards=src_shapefile_wildcards_str,
shapefiles=shapefiles,
)
with open('Makefile-import-data', 'w') as fh:
fh.write(import_data_makefile)
fh.write('\n')

0 comments on commit b7d3145

Please sign in to comment.