Skip to content

Commit

Permalink
Merge pull request #16 from nens/casper-filepaths
Browse files Browse the repository at this point in the history
Adapt safe_file_url
  • Loading branch information
arjanverkerk committed Nov 13, 2019
2 parents 06c359e + e8cc683 commit b13a74f
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 52 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ Changelog of dask-geomodeling

- Run unittests on windows.

- Adapt safe_abspath and safe_file_url functions: they now automatically
interpret the geomodeling.root config instead of the 'start' kwarg.

- Added a geomodeling.strict-file-paths that defaults to False. This changes
the default behaviour of all blocks that handle file paths: by default, the
path is not required to be in geomodeling.root.


2.0.4 (2019-11-01)
------------------
Expand Down
1 change: 1 addition & 0 deletions dask_geomodeling/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

defaults = {
"root": os.getcwd(),
"strict-file-paths": False,
"raster-limit": 12 * (1024 ** 2), # ca. 100 MB of float64
"geometry-limit": 10000,
}
Expand Down
6 changes: 3 additions & 3 deletions dask_geomodeling/geometry/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class GeometryFileSource(GeometryBlock):
"""

def __init__(self, url, layer=None, id_field="id"):
safe_url = utils.safe_file_url(url, config.get("geomodeling.root"))
safe_url = utils.safe_file_url(url)
super().__init__(safe_url, layer, id_field)

@property
Expand All @@ -58,7 +58,7 @@ def id_field(self):

@property
def path(self):
return utils.safe_abspath(self.url, config.get("geomodeling.root"))
return utils.safe_abspath(self.url)

@property
def columns(self):
Expand Down Expand Up @@ -86,7 +86,7 @@ def get_sources_and_requests(self, **request):

@staticmethod
def process(url, request):
path = utils.safe_abspath(url, config.get("geomodeling.root"))
path = utils.safe_abspath(url)

# convert the requested projection to a fiona CRS
crs = utils.get_crs(request["projection"])
Expand Down
6 changes: 3 additions & 3 deletions dask_geomodeling/raster/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ class RasterFileSource(RasterBlock):
"""

def __init__(self, url, time_first=0, time_delta=300000):
url = utils.safe_file_url(url, config.get("geomodeling.root"))
url = utils.safe_file_url(url)
if isinstance(time_first, datetime):
time_first = utils.dt_to_ms(time_first)
else:
Expand Down Expand Up @@ -332,7 +332,7 @@ def gdal_dataset(self):
try:
return self._gdal_dataset
except AttributeError:
path = utils.safe_abspath(self.url, config.get("geomodeling.root"))
path = utils.safe_abspath(self.url)
self._gdal_dataset = gdal.Open(path)
return self._gdal_dataset

Expand Down Expand Up @@ -466,7 +466,7 @@ def process(process_kwargs):

# open the dataset
url = process_kwargs["url"]
path = utils.safe_abspath(url, config.get("geomodeling.root"))
path = utils.safe_abspath(url)
dataset = gdal.Open(path)
first_band = process_kwargs["first_band"]
last_band = process_kwargs["last_band"]
Expand Down
96 changes: 65 additions & 31 deletions dask_geomodeling/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
import sys

from dask import config
from osgeo import osr
from shapely import geometry
from shapely.geometry import box
Expand Down Expand Up @@ -86,38 +87,71 @@ def test_get_footprint(self):
)
self.assertTrue(np.equal(output, reference).all())

@pytest.mark.skipif(
sys.platform.startswith("win"),
reason="Path tests are not yet written for windows",
)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_safe_file_url(self):
# prepends file:// if necessary
self.assertEqual(utils.safe_file_url("/tmp"), "file:///tmp")
self.assertEqual(utils.safe_file_url("/tmp", "/"), "file:///tmp")

# absolute input
self.assertEqual(utils.safe_file_url("file:///tmp"), "file:///tmp")
self.assertEqual(utils.safe_file_url("file:///tmp", "/"), "file:///tmp")
self.assertEqual(utils.safe_file_url("file://tmp", "/"), "file:///tmp")

# relative input
self.assertEqual(
utils.safe_file_url("path", "/tmp/abs"), "file:///tmp/abs/path"
)
self.assertEqual(
utils.safe_file_url("../abs/path", "/tmp/abs"), "file:///tmp/abs/path"
)

# raise on relative path without start provided
self.assertRaises(IOError, utils.safe_file_url, "file://tmp")

# raise on unknown protocol
self.assertRaises(NotImplementedError, utils.safe_file_url, "unknown://tmp")

# raise on path outside start (tested more thorough in safe_relpath)
self.assertRaises(IOError, utils.safe_file_url, "file://../x", "/tmp")
self.assertRaises(IOError, utils.safe_file_url, "/etc/abs", "/tmp")
self.assertRaises(IOError, utils.safe_file_url, "../", "/tmp")
f = utils.safe_file_url
if not sys.platform.startswith("win"):
# prepends file:// if necessary
assert f("/tmp") == "file:///tmp"
assert f("/tmp", "/") == "file:///tmp"

# absolute input
assert f("file:///tmp") == "file:///tmp"
assert f("file:///tmp", "/") == "file:///tmp"
assert f("file://tmp", "/") == "file:///tmp"

# relative input
assert f("path", "/tmp/abs") == "file:///tmp/abs/path"
assert f("../abs/path", "/tmp/abs") == "file:///tmp/abs/path"

# raise on unknown protocol
with pytest.raises(NotImplementedError):
f("unknown://tmp")

# paths outside of 'start'
assert f("file://../x", "/tmp") == "file:///x"
assert f("/etc/abs", "/tmp") == "file:///etc/abs"
assert f("../", "/tmp") == "file:///"

# raise on path outside start when strict-file-paths=True
with config.set({'geomodeling.strict-file-paths': True}):
with pytest.raises(IOError):
f("file://../x", "/tmp")
with pytest.raises(IOError):
f("/etc/abs", "/tmp")
with pytest.raises(IOError):
f("../", "/tmp")
else:
# prepends file:// if necessary
assert f("C:\\tmp") == "file://C:\\tmp"
assert f("C:\\tmp", "C:\\") == "file://C:\\tmp"

# absolute input
assert f("file://C:\\tmp") == "file://C:\\tmp"
assert f("file://C:\\tmp", "C:\\") == "file://C:\\tmp"
assert f("file://tmp", "C:\\") == "file://C:\\tmp"

# relative input
assert f("path", "C:\\tmp\\abs") == "file://C:\\tmp\\abs\\path"
assert f("..\\abs\\path", "C:\\tmp\\abs") == "file://C:\\tmp\\abs\\path"

# raise on unknown protocol
with pytest.raises(NotImplementedError):
f("unknown://tmp")

# paths outside of 'start'
assert f("file://..\\x", "C:\\tmp") == "file://C:\\x"
assert f("D:\\tmp", "C:\\tmp") == "file://D:\\tmp"
assert f("..\\", "C:\\tmp") == "file://C:\\"

# raise on path outside start when strict-file-paths=True
with config.set({'geomodeling.strict-file-paths': True}):
with pytest.raises(IOError):
f("file://..\\x", "C:\\tmp")
with pytest.raises(IOError):
f("D:\\tmp", "C:\\tmp")
with pytest.raises(IOError):
f("..\\", "C:\\tmp")

def test_get_crs(self):
# from EPSG
Expand Down
43 changes: 28 additions & 15 deletions dask_geomodeling/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import pytz
import os
import warnings
from functools import lru_cache
from itertools import repeat

Expand All @@ -9,6 +10,7 @@
import numpy as np
import pandas as pd
from scipy import ndimage
from dask import config

from osgeo import gdal, ogr, osr, gdal_array
from shapely.geometry import box, Point
Expand Down Expand Up @@ -652,22 +654,24 @@ def rasterize_geoseries(geoseries, bbox, projection, height, width, values=None)


def safe_abspath(url, start=None):
"""Returns the absolute path from a file:// URL and start
If start = None, an absolute path is expected in URL."""
url = safe_file_url(url, start)
"""Executes safe_file_url but only returns the path and not the protocol.
"""
url = safe_file_url(url)
_, path = url.split("://")
return path


def safe_file_url(url, start=None):
"""Formats an URL so that it meets the following safety conditions:
- the URL starts with file:// (raises NotImplementedError if not)
- the path is absolute
- the path is contained in `start` (raises IOError if not)
- the URL starts with file:// (else: raises NotImplementedError)
- the path is absolute (relative paths are taken relative to
geomodeling.root)
- if geomodeling.strict_paths: the path has to be contained inside
`start` (else: raises IOError)
If `start` is None, absolute paths are returned.
For backwards compatibility, geomodeling.root can be overriden using the
'start' argument.
"""
try:
protocol, path = url.split("://")
Expand All @@ -677,16 +681,25 @@ def safe_file_url(url, start=None):
else:
if protocol != "file":
raise NotImplementedError('Unknown protocol: "{}"'.format(protocol))
if start is None:
if not os.path.isabs(path):
if start is not None:
warnings.warn(
"Using the start argument in safe_file_url is deprecated. Use the "
"'geomodeling.root' in the dask config", DeprecationWarning
)
else:
start = config.get("geomodeling.root")

if not os.path.isabs(path):
if start is None:
raise IOError(
"Relative path '{}' provided but start was not " "given.".format(path)
"Relative path '{}' provided but start was not given.".format(path)
)
abspath = os.path.abspath(path)
else:
abspath = os.path.abspath(os.path.join(start, path))
if not abspath.startswith(start):
raise IOError("'{}' is not contained in '{}'".format(path, start))
else:
abspath = os.path.abspath(path)
strict = config.get("geomodeling.strict-file-paths")
if strict and not abspath.startswith(start):
raise IOError("'{}' is not contained in '{}'".format(path, start))
return "://".join([protocol, abspath])


Expand Down

0 comments on commit b13a74f

Please sign in to comment.