Skip to content

Commit

Permalink
Merge pull request #942 from koordinates/import-raster-from-s3
Browse files Browse the repository at this point in the history
Fix import-from-s3 for sidecar files.
  • Loading branch information
olsen232 committed Nov 16, 2023
2 parents 53539b1 + 0a15372 commit f9f6000
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 3 deletions.
4 changes: 3 additions & 1 deletion kart/raster/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ def existing_tile_matches_source(self, source_oid, existing_summary):

def sidecar_files(self, source):
source = str(source)
if self.DATASET_CLASS.remove_tile_extension(source) == source:
if source.startswith("s3://"):
# Can't directly check if this S3 object exists, but here's where it should be:
yield source + PAM_SUFFIX, PAM_SUFFIX
return

pam_path = source + PAM_SUFFIX
Expand Down
21 changes: 19 additions & 2 deletions kart/tile/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import uuid

import click
import botocore
import pygit2

from kart.cli_util import find_param
Expand Down Expand Up @@ -38,7 +39,7 @@
)
from kart.list_of_conflicts import ListOfConflicts
from kart.meta_items import MetaItemFileType
from kart.s3_util import expand_s3_glob, fetch_from_s3
from kart.s3_util import expand_s3_glob, fetch_from_s3, get_error_code
from kart.progress_util import progress_bar
from kart.output_util import (
format_json_for_output,
Expand Down Expand Up @@ -447,9 +448,23 @@ def extract_tile_metadata(self, source):
local_path = self.repo.lfs_tmp_path / str(uuid.uuid4())
fetch_from_s3(source, local_path)
tile_path = local_path
# Also fetch sidecar files, if present.
for sidecar_file, suffix in self.sidecar_files(source):
try:
fetch_from_s3(
sidecar_file,
local_path.with_name(local_path.name + suffix),
)
except botocore.exceptions.ClientError as e:
if get_error_code(e) == 404:
# Not having any particular type of sidecar for any particular tile is allowed.
continue
else:
raise e
else:
local_path = None
tile_path = source

metadata = self.extract_tile_metadata_from_filesystem_path(tile_path)
return local_path, metadata

Expand Down Expand Up @@ -711,7 +726,9 @@ def import_tiles_to_stream(self, stream, sources):
blob_path = f"{self.dataset_inner_path}/{rel_blob_path}"
write_blob_to_stream(stream, blob_path, pointer_data)

for sidecar_file, suffix in self.sidecar_files(source):
for sidecar_file, suffix in self.sidecar_files(
self.source_to_local_path.get(source) or source
):
pointer_dict = self.copy_file_to_local_lfs_cache(sidecar_file)
pointer_data = dict_to_pointer_file_bytes(pointer_dict)
write_blob_to_stream(stream, blob_path + suffix, pointer_data)
Expand Down
123 changes: 123 additions & 0 deletions tests/raster/test_imports.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import pytest

from kart.exceptions import INVALID_ARGUMENT, NO_CHANGES
Expand Down Expand Up @@ -397,3 +398,125 @@ def test_import_single_geotiff_with_rat(
"d8f514e654a81bdcd7428886a15e300c56b5a5ff92898315d16757562d2968ca",
36908,
)


@pytest.mark.slow
def test_raster_import_from_s3__no_convert(
tmp_path,
chdir,
cli_runner,
s3_test_data_raster,
check_lfs_hashes,
check_tile_is_reflinked,
):
repo_path = tmp_path / "raster-repo"
r = cli_runner.invoke(["init", repo_path])
assert r.exit_code == 0

repo = KartRepo(repo_path)
with chdir(repo_path):
r = cli_runner.invoke(
[
"raster-import",
s3_test_data_raster,
"--message=test_import_from_s3_no_convert",
"--dataset-path=erorisk_si",
"--preserve-format",
]
)
assert r.exit_code == 0, r.stderr

check_lfs_hashes(repo, 2)

r = cli_runner.invoke(["meta", "get", "erorisk_si", "format.json", "-ojson"])
assert r.exit_code == 0, r.stderr
assert json.loads(r.stdout) == {
"erorisk_si": {"format.json": {"fileType": "geotiff"}}
}

r = cli_runner.invoke(["show", "HEAD", "erorisk_si:tile"])
assert r.exit_code == 0, r.stderr
assert r.stdout.splitlines()[4:] == [
" test_import_from_s3_no_convert",
"",
"+++ erorisk_si:tile:erorisk_silcdb4",
"+ name = erorisk_silcdb4.tif",
"+ crs84Extent = POLYGON((172.6754107 -43.7555641,172.6748326 -43.8622096,172.8170036 -43.8625257,172.8173289 -43.755879,172.6754107 -43.7555641,172.6754107 -43.7555641))",
"+ dimensions = 762x790",
"+ format = geotiff/cog",
"+ nativeExtent = POLYGON((1573869.73 5155224.347,1573869.73 5143379.674,1585294.591 5143379.674,1585294.591 5155224.347,1573869.73 5155224.347))",
"+ oid = sha256:c4bbea4d7cfd54f4cdbca887a1b358a81710e820a6aed97cdf3337fd3e14f5aa",
"+ size = 604652",
"+ pamName = erorisk_silcdb4.tif.aux.xml",
"+ pamOid = sha256:d8f514e654a81bdcd7428886a15e300c56b5a5ff92898315d16757562d2968ca",
"+ pamSize = 36908",
]

assert (repo_path / "erorisk_si" / "erorisk_silcdb4.tif").is_file()
check_tile_is_reflinked(repo_path / "erorisk_si" / "erorisk_silcdb4.tif", repo)
assert (repo_path / "erorisk_si" / "erorisk_silcdb4.tif.aux.xml").is_file()
check_tile_is_reflinked(
repo_path / "erorisk_si" / "erorisk_silcdb4.tif.aux.xml", repo
)


@pytest.mark.slow
def test_raster_import_from_s3__convert(
tmp_path,
chdir,
cli_runner,
s3_test_data_raster,
check_lfs_hashes,
check_tile_is_reflinked,
):
repo_path = tmp_path / "raster-repo"
r = cli_runner.invoke(["init", repo_path])
assert r.exit_code == 0

repo = KartRepo(repo_path)
with chdir(repo_path):
r = cli_runner.invoke(
[
"raster-import",
s3_test_data_raster,
"--message=test_import_from_s3_convert",
"--dataset-path=erorisk_si",
"--convert-to-cog",
]
)
assert r.exit_code == 0, r.stderr

check_lfs_hashes(repo, 2)

r = cli_runner.invoke(["meta", "get", "erorisk_si", "format.json", "-ojson"])
assert r.exit_code == 0, r.stderr
assert json.loads(r.stdout) == {
"erorisk_si": {
"format.json": {"fileType": "geotiff", "profile": "cloud-optimized"}
}
}

r = cli_runner.invoke(["show", "HEAD", "erorisk_si:tile"])
assert r.exit_code == 0, r.stderr
assert r.stdout.splitlines()[4:] == [
" test_import_from_s3_convert",
"",
"+++ erorisk_si:tile:erorisk_silcdb4",
"+ name = erorisk_silcdb4.tif",
"+ crs84Extent = POLYGON((172.6754107 -43.7555641,172.6748326 -43.8622096,172.8170036 -43.8625257,172.8173289 -43.755879,172.6754107 -43.7555641,172.6754107 -43.7555641))",
"+ dimensions = 762x790",
"+ format = geotiff/cog",
"+ nativeExtent = POLYGON((1573869.73 5155224.347,1573869.73 5143379.674,1585294.591 5143379.674,1585294.591 5155224.347,1573869.73 5155224.347))",
"+ oid = sha256:c4bbea4d7cfd54f4cdbca887a1b358a81710e820a6aed97cdf3337fd3e14f5aa",
"+ size = 604652",
"+ pamName = erorisk_silcdb4.tif.aux.xml",
"+ pamOid = sha256:d8f514e654a81bdcd7428886a15e300c56b5a5ff92898315d16757562d2968ca",
"+ pamSize = 36908",
]

assert (repo_path / "erorisk_si" / "erorisk_silcdb4.tif").is_file()
check_tile_is_reflinked(repo_path / "erorisk_si" / "erorisk_silcdb4.tif", repo)
assert (repo_path / "erorisk_si" / "erorisk_silcdb4.tif.aux.xml").is_file()
check_tile_is_reflinked(
repo_path / "erorisk_si" / "erorisk_silcdb4.tif.aux.xml", repo
)

0 comments on commit f9f6000

Please sign in to comment.