Skip to content

Commit

Permalink
Merge pull request #91 from ssciwr/test-data-download
Browse files Browse the repository at this point in the history
Download test data dynamically instead of shipping it with the library
  • Loading branch information
dokempf authored Dec 14, 2021
2 parents 253bb01 + 5b8b1a3 commit 192f77e
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 99 deletions.
3 changes: 0 additions & 3 deletions adaptivefiltering/data/500k_NZ20_Westport.laz

This file was deleted.

10 changes: 0 additions & 10 deletions adaptivefiltering/data/LICENSE.md

This file was deleted.

65 changes: 0 additions & 65 deletions adaptivefiltering/data/metadata_500k_NZ20_Westport.laz.txt

This file was deleted.

3 changes: 0 additions & 3 deletions adaptivefiltering/data/minimal.las

This file was deleted.

32 changes: 30 additions & 2 deletions adaptivefiltering/paths.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
import functools
import hashlib
import json
import os
import platform
import requests
import tarfile
import tempfile
import uuid
import xdg


# Storage for the temporary workspace directory
_tmp_dir = None

# Storage for the data directory that will be used to resolve relative paths
_data_dir = None

# The current data archive URL
TEST_DATA_ARCHIVE = "https://github.com/ssciwr/adaptivefiltering-test-data/releases/download/2021-12-14/data.tar.gz"
TEST_DATA_CHECKSUM = "b1af80c173ad475c14972a32bbf86cdbdb8a2197de48ca1e40c4a9859afcabcb"


def set_data_directory(directory):
"""Set a custom root directory to locate data files
Expand Down Expand Up @@ -49,6 +57,26 @@ def get_temporary_filename(extension=""):
return os.path.join(get_temporary_workspace(), f"{uuid.uuid4()}.{extension}")


def download_test_file(filename):
"""Ensure the existence of a dataset file by downloading it"""
full_file = os.path.join(get_temporary_workspace(), "data", filename)

if not os.path.exists(full_file):
archive = requests.get(TEST_DATA_ARCHIVE).content
checksum = hashlib.sha256(archive).hexdigest()
if checksum != TEST_DATA_CHECKSUM:
raise ValueError("Checksum for test data archive failed.")

archive_file = os.path.join(get_temporary_workspace(), "data.tar.gz")
with open(archive_file, "wb") as tar:
tar.write(archive)

with tarfile.open(archive_file, "r:gz") as tar:
tar.extractall(path=os.path.join(get_temporary_workspace(), "data"))

return full_file


def locate_file(filename):
"""Locate a file on the filesystem
Expand Down Expand Up @@ -92,8 +120,8 @@ def locate_file(filename):
for xdg_dir in xdg.xdg_data_dirs():
candidates.append(os.path.join(xdg_dir, filename))

# Use the package installation directory
candidates.append(os.path.join(os.path.split(__file__)[0], filename))
# Use the test data directory
candidates.append(download_test_file(filename))

# Iterate through the list to check for file existence
for candidate in candidates:
Expand Down
4 changes: 2 additions & 2 deletions jupyter/datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
},
"outputs": [],
"source": [
"ds = adaptivefiltering.DataSet(filename=\"data/500k_NZ20_Westport.laz\")"
"ds = adaptivefiltering.DataSet(filename=\"500k_NZ20_Westport.laz\")"
]
},
{
Expand Down Expand Up @@ -89,7 +89,7 @@
"outputs": [],
"source": [
"ds_epsg = adaptivefiltering.DataSet(\n",
" filename=\"data/500k_NZ20_Westport.laz\", spatial_reference=\"EPSG:4326\"\n",
" filename=\"500k_NZ20_Westport.laz\", spatial_reference=\"EPSG:4326\"\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion jupyter/filtering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = adaptivefiltering.DataSet(filename=\"data/500k_NZ20_Westport.laz\")"
"dataset = adaptivefiltering.DataSet(filename=\"500k_NZ20_Westport.laz\")"
]
},
{
Expand Down
20 changes: 8 additions & 12 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,14 @@ def mock_environment(**env):
os.environ.update(old_env)


@pytest.fixture
def dataset():
return adaptivefiltering.DataSet(filename="data/500k_NZ20_Westport.laz")
def _dataset_fixture(filename, spatial_reference=None):
@pytest.fixture
def _fixture():
return adaptivefiltering.DataSet(filename, spatial_reference=spatial_reference)

return _fixture

@pytest.fixture
def dataset_thingstaette():
return adaptivefiltering.DataSet(filename="data/uls_thingstaette.las")


@pytest.fixture
def minimal_dataset():
return adaptivefiltering.DataSet(
filename="data/minimal.las", spatial_reference="EPSG:4362"
)
# Fixtures for the provided datasets
dataset = _dataset_fixture("500k_NZ20_Westport.laz")
minimal_dataset = _dataset_fixture("minimal.las", spatial_reference="EPSG:4362")
2 changes: 1 addition & 1 deletion tests/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_paths(tmp_path):
assert abspath == locate_file("somefile.txt")

# Check that we always find the data provided by the package
assert os.path.exists(locate_file("data/500k_NZ20_Westport.laz"))
assert os.path.exists(locate_file("500k_NZ20_Westport.laz"))


def test_set_data_directory(tmp_path):
Expand Down

0 comments on commit 192f77e

Please sign in to comment.