Skip to content

Commit

Permalink
Cleanup importers - more shared code, less dicts
Browse files Browse the repository at this point in the history
  • Loading branch information
olsen232 committed Nov 30, 2023
1 parent 878c8eb commit 3ba7f8c
Show file tree
Hide file tree
Showing 30 changed files with 539 additions and 863 deletions.
1 change: 0 additions & 1 deletion kart.spec
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ pyi_analysis = Analysis(
# TODO: improve this somehow
*collect_submodules('kart'),
*collect_submodules('kart.annotations'),
*collect_submodules('kart.linked_storage'),
*collect_submodules('kart.lfs_commands'),
*collect_submodules('kart.point_cloud'),
*collect_submodules('kart.sqlalchemy'),
Expand Down
2 changes: 0 additions & 2 deletions kart/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@
"point_cloud.import_": {"point-cloud-import"},
"install": {"install"},
"add_dataset": {"add-dataset"},
"linked_storage.point_cloud_import": {"linked-point-cloud-import"},
"linked_storage.raster_import": {"linked-raster-import"},
}

# These commands aren't valid Python symbols, even when we change dash to underscore.
Expand Down
20 changes: 9 additions & 11 deletions kart/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
forward_context_to_command,
)
from kart.completion_shared import file_path_completer
from kart.import_sources import from_spec, suggest_specs
from kart.import_sources import from_spec, suggest_specs, ImportType


def list_import_formats(ctx):
Expand Down Expand Up @@ -67,8 +67,8 @@ def list_import_formats(ctx):
is_flag=True,
help=(
"Link the created dataset to the original source location, so that the original source location is treated as "
"the authoritative source for the given data and data is fetched from there if needed. Only supported for "
"tile-based datasets."
"the authoritative source for the given data and data is fetched from there if needed. Currently only "
"supported for tile-based datasets where the tiles are sourced from S3."
),
)
@click.argument(
Expand Down Expand Up @@ -145,13 +145,11 @@ def import_(ctx, args, **kwargs):
f"Try one of the following:\n{suggest_specs()}"
)

if kwargs.get("do_link"):
import_cmd = import_source_type.linked_import_cmd
if import_cmd is None:
raise click.UsageError(
"--link is not supported for vector or tabular imports"
)
else:
import_cmd = import_source_type.import_cmd
if kwargs.get("do_link") and import_source_type.import_type in (
ImportType.SQLALCHEMY_TABLE,
ImportType.OGR_TABLE,
):
raise click.UsageError("--link is not supported for vector or tabular imports")

import_cmd = import_source_type.import_cmd
forward_context_to_command(ctx, import_cmd)
15 changes: 0 additions & 15 deletions kart/import_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,6 @@ def import_cmd(self):

return raster_import

@property
def linked_import_cmd(self):
if self is self.POINT_CLOUD:
from kart.linked_storage.point_cloud_import import linked_point_cloud_import

return linked_point_cloud_import
elif self is self.RASTER:
from kart.linked_storage.raster_import import linked_raster_import

return linked_raster_import

@property
def import_source_class(self):
if self is self.SQLALCHEMY_TABLE:
Expand Down Expand Up @@ -91,10 +80,6 @@ def __init__(
def import_cmd(self):
return self.import_type.import_cmd

@property
def linked_import_cmd(self):
return self.import_type.linked_import_cmd

@property
def import_source_class(self):
return self.import_type.import_source_class
Expand Down
8 changes: 4 additions & 4 deletions kart/lfs_commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from kart.lfs_util import (
pointer_file_bytes_to_dict,
get_hash_from_pointer_file,
get_local_path_from_lfs_hash,
get_local_path_from_lfs_oid,
)
from kart.object_builder import ObjectBuilder
from kart.rev_list_objects import rev_list_tile_pointer_files
Expand Down Expand Up @@ -68,7 +68,7 @@ def ls_files(ctx, show_size, all, ref1, ref2):

@functools.lru_cache()
def is_present(lfs_hash):
return get_local_path_from_lfs_hash(repo, lfs_hash).is_file()
return get_local_path_from_lfs_oid(repo, lfs_hash).is_file()

for commit_id, path_match_result, pointer_blob in rev_list_tile_pointer_files(
repo, start_commits, stop_commits
Expand Down Expand Up @@ -324,7 +324,7 @@ def fetch_lfs_blobs_for_pointer_files(
url = pointer_dict.get("url")
lfs_oid = get_hash_from_pointer_file(pointer_dict)
pointer_file_oid = pointer_blob.hex
lfs_path = get_local_path_from_lfs_hash(repo, lfs_oid)
lfs_path = get_local_path_from_lfs_oid(repo, lfs_oid)
if lfs_path.is_file():
continue # Already fetched.

Expand Down Expand Up @@ -380,7 +380,7 @@ def _do_fetch_from_urls(repo, urls_and_lfs_oids, quiet=False):
)

urls_and_paths_and_oids = [
(url, get_local_path_from_lfs_hash(repo, lfs_oid), lfs_oid)
(url, get_local_path_from_lfs_oid(repo, lfs_oid), lfs_oid)
for (url, lfs_oid) in urls_and_lfs_oids
]
path_parents = {path.parent for url, path, lfs_oid in urls_and_paths_and_oids}
Expand Down
39 changes: 20 additions & 19 deletions kart/lfs_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def install_lfs_hooks(repo):
)


def get_hash_and_size_of_file(path):
def get_oid_and_size_of_file(path):
"""Given a path to a file, calculates and returns its SHA256 hash and length in bytes."""
if not isinstance(path, Path):
path = Path(path)
Expand All @@ -55,7 +55,7 @@ def get_hash_and_size_of_file(path):
return sha256.hexdigest(), size


def get_hash_and_size_of_file_while_copying(src_path, dest_path, allow_overwrite=False):
def get_oid_and_size_of_file_while_copying(src_path, dest_path, allow_overwrite=False):
"""
Given a path to a file, calculates and returns its SHA256 hash and length in bytes,
while copying it to the given destination.
Expand Down Expand Up @@ -283,9 +283,7 @@ def get_hash_from_pointer_file(pointer_file_bytes):
if isinstance(pointer_file_bytes, dict):
# Already decoded - just trim off the sha256:
oid = pointer_file_bytes["oid"]
if oid.startswith("sha256:"):
oid = oid[7:] # len("sha256:")
return oid
return unprefix_sha256(oid)

if isinstance(pointer_file_bytes, pygit2.Blob):
pointer_file_bytes = memoryview(pointer_file_bytes)
Expand All @@ -295,13 +293,10 @@ def get_hash_from_pointer_file(pointer_file_bytes):
return None


def get_local_path_from_lfs_hash(repo, lfs_hash):
"""Given a sha256 LFS hash, finds where the object would be stored in the local LFS cache."""
if lfs_hash.startswith("sha256:"):
lfs_hash = lfs_hash[7:] # len("sha256:")
return (
repo.gitdir_path / "lfs" / "objects" / lfs_hash[0:2] / lfs_hash[2:4] / lfs_hash
)
def get_local_path_from_lfs_oid(repo, oid):
"""Given a sha256 LFS OID, finds where the object would be stored in the local LFS cache."""
oid = unprefix_sha256(oid)
return repo.gitdir_path / "lfs" / "objects" / oid[0:2] / oid[2:4] / oid


def copy_file_to_local_lfs_cache(
Expand All @@ -327,15 +322,15 @@ def copy_file_to_local_lfs_cache(
else:
# We can find the hash while copying in this case.
# TODO - check if this is actually any faster.
oid_and_size = get_hash_and_size_of_file_while_copying(
oid_and_size = get_oid_and_size_of_file_while_copying(
source_path, tmp_object_path
)

if not oid_and_size:
oid_and_size = get_hash_and_size_of_file(tmp_object_path)
oid_and_size = get_oid_and_size_of_file(tmp_object_path)
oid, size = oid_and_size

actual_object_path = get_local_path_from_lfs_hash(repo, oid)
actual_object_path = get_local_path_from_lfs_oid(repo, oid)

# Move tmp_object_path to actual_object_path in a robust way -
# check to see if its already there:
Expand All @@ -347,11 +342,17 @@ def copy_file_to_local_lfs_cache(
actual_object_path.parents[0].mkdir(parents=True, exist_ok=True)
tmp_object_path.rename(actual_object_path)

if not oid.startswith("sha256:"):
oid = "sha256:" + oid

return {
"version": GIT_LFS_SPEC_V1,
"oid": oid,
"oid": prefix_sha256(oid),
"size": size,
}


def prefix_sha256(oid):
return oid if oid.startswith("sha256:") else f"sha256:{oid}"


def unprefix_sha256(oid):
# len("sha256:") = 7
return oid[7:] if oid.startswith("sha256:") else oid
Empty file removed kart/linked_storage/__init__.py
Empty file.
93 changes: 0 additions & 93 deletions kart/linked_storage/importer.py

This file was deleted.

0 comments on commit 3ba7f8c

Please sign in to comment.