Skip to content

Commit

Permalink
Add dry_run parameter to to_file (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
caspervdw authored and pyprogrammerblog committed Dec 20, 2019
1 parent 277f541 commit a0abd81
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Changelog of dask-geomodeling

- Added a .to_file() method to all GeometryBlocks.

- Added dry_run parameter (for validation) to .to_file().

- Start using google docstring convention.

- Several minor doc fixes.
Expand Down
23 changes: 18 additions & 5 deletions dask_geomodeling/geometry/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import sys
import shutil
from contextlib import contextmanager

import fiona
import geopandas
Expand Down Expand Up @@ -181,7 +182,14 @@ def merge_files(path, target, remove_source=False):
pass


def to_file(source, url, fields=None, tile_size=None, **request):
@contextmanager
def DryRunTempDir(*args, **kwargs):
yield "/tmp/dummy"


def to_file(
source, url, fields=None, tile_size=None, dry_run=False, **request
):
"""Utility function to export data from a GeometryBlock to a file on disk.
You need to specify the target file path as well as the extent geometry
Expand All @@ -196,6 +204,7 @@ def to_file(source, url, fields=None, tile_size=None, **request):
tile_size (int): Optionally use this for large exports to stay within
memory constraints. The export is split in tiles of given size (units
are determined by the projection). Finally the tiles are merged.
dry_run (bool): Do nothing, only validate the arguments.
geometry (shapely Geometry): Limit exported objects to objects whose
centroid intersects with this geometry.
projection (str): The projection as a WKT string or EPSG code.
Expand All @@ -217,15 +226,19 @@ def to_file(source, url, fields=None, tile_size=None, **request):
path = utils.safe_abspath(url)
extension = os.path.splitext(path)[1]

with tempfile.TemporaryDirectory(
dir=config.get("temporary_directory", None)
) as tmpdir:
sink = GeometryFileSink(source, tmpdir, extension=extension, fields=fields)
TmpDir = DryRunTempDir if dry_run else tempfile.TemporaryDirectory
with TmpDir(dir=config.get("temporary_directory", None)) as tmpdir:
sink = GeometryFileSink(
source, tmpdir, extension=extension, fields=fields
)

# wrap the sink in a GeometryTiler
if tile_size is not None:
sink = GeometryTiler(sink, tile_size, request["projection"])

if dry_run:
return

# export the dataset to the tmpdir (full dataset or multiple tiles)
sink.get_data(**request)

Expand Down
7 changes: 6 additions & 1 deletion dask_geomodeling/tests/test_geometry_sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,16 @@ def test_to_file_with_tiling_geojson(self):
# because we lose the index in the saving process, just check the len
assert len(actual) == 2

def test_to_file_dry_run(self):
self.source.to_file(
self.path + ".geojson", dry_run=True, **self.request
)
assert not os.path.exists(self.path)

def test_to_file_with_tiling_shapefile(self):
self.source.to_file(
self.path + ".shp", tile_size=10, **self.request_tiled
)
actual = gpd.read_file(self.path + ".shp")
# because we lose the index in the saving process, just check the len
assert len(actual) == 2

0 comments on commit a0abd81

Please sign in to comment.