Skip to content

Commit

Permalink
Merge pull request #877 from koordinates/macosx-version-11
Browse files Browse the repository at this point in the history
Fix for reflink - build reflink for macos11
  • Loading branch information
olsen232 committed Jun 22, 2023
2 parents a859515 + aa3fcda commit 454e56f
Show file tree
Hide file tree
Showing 20 changed files with 210 additions and 17 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ jobs:
sudo apt-get update -q -y
sudo ACCEPT_EULA=Y apt-get install -q -y msodbcsql17
- name: "test: install fienode for reflink testing"
run: |
curl -L https://github.com/pwaller/fienode/releases/download/v1.0/fienode-linux-amd64 --output fienode
chmod +x fienode
mv fienode /usr/local/bin/
- name: "test: unit tests"
uses: lukka/run-cmake@v10
with:
Expand Down Expand Up @@ -862,6 +868,14 @@ jobs:
run: |
./build/kart --version
- name: "test: install clone_checker for reflink testing"
run: |
git clone https://github.com/dyorgio/apfs-clone-checker.git
cd apfs-clone-checker
gcc clone_checker.c -o clone_checker
chmod +x clone_checker
mv clone_checker /usr/local/bin/
- name: "test: unit tests"
uses: lukka/run-cmake@v10
with:
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ Please note that compatibility for 0.x releases (software or repositories) isn't

_When adding new entries to the changelog, please include issue/PR numbers wherever possible._

## 0.14.0 (UNRELEASED)

- Fixes a bug where SIGINT (Ctrl+C) wasn't terminating Kart when using helper mode, affects macOS and Linux. [#869](https://github.com/koordinates/kart/pull/869)
- Fixes a bug where reflink wasn't working on macOS, leading to larger size on disk for point cloud datasets. [#876](https://github.com/koordinates/kart/issues/876)

## 0.13.0

### Major fix: Corruption of Postgres NUMERIC values
Expand Down
8 changes: 7 additions & 1 deletion kart/point_cloud/tilename_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ def set_tile_extension(filename, ext=None, tile_format=None):


def get_tile_path_pattern(
tilename=None, *, parent_path=None, include_conflict_versions=False
tilename=None,
*,
parent_path=None,
include_conflict_versions=False,
ignore_tile_case=False,
):
"""
Given a tilename eg "mytile" and a parent_path eg "myfolder",
Expand All @@ -61,6 +65,8 @@ def get_tile_path_pattern(
tile_pattern = (
re.escape(tilename) if tilename is not None else TILE_BASENAME_PATTERN
)
if ignore_tile_case:
tile_pattern = rf"(?i:{tile_pattern})"
version_pattern = (
r"(?:\.ancestor|\.ours|\.theirs)?" if include_conflict_versions else ""
)
Expand Down
2 changes: 2 additions & 0 deletions kart/point_cloud/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,13 @@ def get_tile_path_pattern(
parent_path=None,
include_conflict_versions=False,
is_pam=None,
ignore_tile_case=False,
):
return get_tile_path_pattern(
tilename,
parent_path=parent_path,
include_conflict_versions=include_conflict_versions,
ignore_tile_case=ignore_tile_case,
)

@classmethod
Expand Down
9 changes: 8 additions & 1 deletion kart/raster/tilename_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ def set_tile_extension(filename, ext=None, tile_format=None):


def get_tile_path_pattern(
tilename=None, *, parent_path=None, include_conflict_versions=False, is_pam=None
tilename=None,
*,
parent_path=None,
include_conflict_versions=False,
is_pam=None,
ignore_tile_case=False,
):
"""
Given a tilename eg "mytile" and a parent_path eg "myfolder",
Expand All @@ -49,6 +54,8 @@ def get_tile_path_pattern(
tile_pattern = (
re.escape(tilename) if tilename is not None else TILE_BASENAME_PATTERN
)
if ignore_tile_case:
tile_pattern = rf"(?i:{tile_pattern})"
version_pattern = (
r"(?:\.ancestor|\.ours|\.theirs)?" if include_conflict_versions else ""
)
Expand Down
2 changes: 2 additions & 0 deletions kart/raster/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@ def get_tile_path_pattern(
parent_path=None,
include_conflict_versions=False,
is_pam=None,
ignore_tile_case=False,
):
return get_tile_path_pattern(
tilename,
parent_path=parent_path,
include_conflict_versions=include_conflict_versions,
is_pam=is_pam,
ignore_tile_case=ignore_tile_case,
)

def diff_tile(self, other, tile_filter=FeatureKeyFilter.MATCH_ALL, reverse=False):
Expand Down
2 changes: 2 additions & 0 deletions kart/reflink_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ def reflink(from_, to):
def try_reflink(from_, to):
"""Same as reflink_util.reflink, but falls back to shutil.copy if reflinking fails."""

assert not to.exists()

try:
return reflink(from_, to)
except (rl.ReflinkImpossibleError, NotImplementedError):
Expand Down
13 changes: 10 additions & 3 deletions kart/workdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from kart import subprocess_util as subprocess
from kart.tile import ALL_TILE_DATASET_TYPES
from kart.tile.tile_dataset import TileDataset
from kart.tile.tilename_util import remove_any_tile_extension, PAM_SUFFIX
from kart.tile.tilename_util import (
remove_any_tile_extension,
case_insensitive,
PAM_SUFFIX,
)
from kart.working_copy import WorkingCopyPart

L = logging.getLogger("kart.workdir")
Expand Down Expand Up @@ -804,8 +808,11 @@ def _hard_reset_renamed_pam_file(self, dataset, tile_delta):
assert self.repo.workdir_path in ds_tiles_dir.parents
assert ds_tiles_dir.is_dir()

pam_name_pattern = dataset.get_tile_path_pattern(tilename, is_pam=True)
for child in ds_tiles_dir.glob(tilename + ".*"):
pam_name_pattern = dataset.get_tile_path_pattern(
tilename, is_pam=True, ignore_tile_case=True
)

for child in ds_tiles_dir.glob(case_insensitive(tilename) + ".*"):
if pam_name_pattern.fullmatch(child.name) and child.is_file():
child.unlink()

Expand Down
11 changes: 9 additions & 2 deletions requirements/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@ set(REQUIREMENTS_DEPS
PARENT_SCOPE)

# fixme: use a list from somewhere else
set(WHEEL_DEPS cryptography psycopg2 pygit2 gdal cffi pysqlite3
# pyodbc: this is OS-qualified already in requirements.in
set(WHEEL_DEPS
cryptography
psycopg2
pygit2
gdal
cffi
pysqlite3
reflink
# pyodbc: this is OS-qualified already in requirements.in
)

set(PIP_COMPILE_COMMAND
Expand Down
1 change: 0 additions & 1 deletion requirements/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ rst2txt
shellingham
sqlalchemy
tqdm
reflink
jsonschema>=4.3

# these are only here for dependencies, we build them in vcpkg-vendor,
Expand Down
6 changes: 3 additions & 3 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ certifi==2022.12.7
# reflink
click==8.1.3
# via -r requirements.in
#cryptography==38.0.3
#cryptography==41.0.0
# via -r vendor-wheels.txt
docutils==0.17.1
# via
Expand Down Expand Up @@ -48,8 +48,8 @@ pyrsistent==0.19.2
# via jsonschema
#pysqlite3==0.4.5
# via -r vendor-wheels.txt
reflink==0.2.1
# via -r requirements.in
#reflink==0.2.1
# via -r vendor-wheels.txt
rst2txt==1.1.0
# via -r requirements.in
shellingham==1.5.0
Expand Down
1 change: 1 addition & 0 deletions requirements/vendor-wheels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ psycopg2==2.8.5
pygit2==1.9.0
pyodbc==4.0.32
pysqlite3==0.4.5
reflink==0.2.1
65 changes: 65 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,3 +1236,68 @@ def _check_lfs_hashes(repo, expected_file_count=None):
assert file_count == expected_file_count

return _check_lfs_hashes


@pytest.fixture()
def check_tile_is_reflinked():
"""
Makes sure that a particular tile is reflinked to the same tile in the LFS cache.
Makes no asserts at all
- on windows, where we don't support reflinks.
- if reflink is not supported on the filesystem.
- if we lack the tools to check if files are reflinked (clone_checker or )
"""

import shutil
import subprocess

import reflink

from kart import is_windows
from kart.lfs_util import get_hash_and_size_of_file, get_local_path_from_lfs_hash

clone_checker = shutil.which("clone_checker")
fienode = shutil.which("fienode")
has_checker = bool(clone_checker or fienode)

def _check_tile_is_reflinked(tile_path, repo, do_raise_skip=False):
if is_windows:
if do_raise_skip:
raise pytest.skip("Reflink is not supported on windows")
else:
return

reflink_supported = reflink.supported_at(str(repo.workdir_path))
if not reflink_supported:
if do_raise_skip:
raise pytest.skip("Reflink is not supported on this filesystem")
else:
return

if not has_checker:
if do_raise_skip:
raise pytest.skip(
"Can't check if tiles are reflinked: install dyorgio/apfs-clone-checker or pwaller/fienode to check reflinks"
)
else:
return

tile_hash, size = get_hash_and_size_of_file(tile_path)
lfs_path = get_local_path_from_lfs_hash(repo, tile_hash)

if clone_checker:
output = subprocess.check_output(
[clone_checker, str(tile_path), str(lfs_path)], encoding="utf8"
)
assert (
output.strip() == "1"
), f"{tile_path} and {lfs_path} should be reflinked"

elif fienode:
fienode1 = subprocess.check_output([fienode, str(tile_path)])
fienode2 = subprocess.check_output([fienode, str(lfs_path)])
assert (
fienode1 == fienode2
), f"{tile_path} and {lfs_path} should be reflinked"

return _check_tile_is_reflinked
4 changes: 4 additions & 0 deletions tests/point_cloud/test_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def test_import_several_laz__convert(
check_lfs_hashes,
requires_pdal,
requires_git_lfs,
check_tile_is_reflinked,
):
# Using postgres here because it has the best type preservation
with data_archive_readonly("point-cloud/laz-auckland.tgz") as auckland:
Expand Down Expand Up @@ -217,6 +218,9 @@ def test_import_several_laz__convert(
assert (
repo_path / "auckland" / f"auckland_{x}_{y}.copc.laz"
).is_file()
check_tile_is_reflinked(
repo_path / "auckland" / f"auckland_{x}_{y}.copc.laz", repo
)


@pytest.mark.parametrize("command", ["point-cloud-import", "import"])
Expand Down
24 changes: 24 additions & 0 deletions tests/point_cloud/test_workingcopy.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import re
import shutil

import reflink
import pygit2
import pytest

from kart import is_windows
from kart.exceptions import (
WORKING_COPY_OR_IMPORT_CONFLICT,
NO_CHANGES,
Expand Down Expand Up @@ -1244,3 +1246,25 @@ def test_working_copy_conflicting_extension(cli_runner, data_archive, requires_p
r = cli_runner.invoke(["status"])
assert r.exit_code == INVALID_OPERATION
assert "More than one tile found in working copy with the same name" in r.stderr


@pytest.mark.skipif(is_windows, reason="copy-on-write not supported on windows")
def test_working_copy_reflink(cli_runner, data_archive, check_tile_is_reflinked):
# This test will show as passed if Kart's reflinks are working,
# skipped if reflinks are not supported on this filesystem or if we can't detect them,
# and failed if reflinks are supported but Kart fails to make use of them.

with data_archive("point-cloud/auckland.tgz") as repo_path:
repo = KartRepo(repo_path)

# Extracting a repo that was tarred probably doesn't give you reflinks -
# so we recreate the working copy so that we do get reflinks.
cli_runner.invoke(["create-workingcopy", "--delete-existing"])

for x in range(4):
for y in range(4):
check_tile_is_reflinked(
repo_path / "auckland" / f"auckland_{x}_{y}.copc.laz",
repo,
do_raise_skip=True,
)
5 changes: 5 additions & 0 deletions tests/raster/test_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def test_import_single_cogtiff(
data_archive_readonly,
check_lfs_hashes,
requires_git_lfs,
check_tile_is_reflinked,
):
with data_archive_readonly("raster/cog-aerial.tgz") as aerial:
repo_path = tmp_path / "raster-repo"
Expand All @@ -199,6 +200,7 @@ def test_import_single_cogtiff(
assert r.exit_code == 0, r.stderr

check_lfs_hashes(repo, 1)
check_tile_is_reflinked(repo_path / "aerial" / "aerial.tif", repo)

r = cli_runner.invoke(["data", "ls"])
assert r.exit_code == 0, r.stderr
Expand Down Expand Up @@ -241,6 +243,7 @@ def test_import_single_geotiff_with_rat(
data_archive,
check_lfs_hashes,
requires_git_lfs,
check_tile_is_reflinked,
):
with data_archive("raster/cog-erosion.tgz") as erosion:
# The PAM file should be found in a case-insensitive way
Expand Down Expand Up @@ -379,6 +382,7 @@ def test_import_single_geotiff_with_rat(

tif = repo_path / "erorisk_silcdb4" / "erorisk_silcdb4.tif"
assert tif.is_file()
check_tile_is_reflinked(tif, repo)
assert get_hash_and_size_of_file(tif) == (
"c4bbea4d7cfd54f4cdbca887a1b358a81710e820a6aed97cdf3337fd3e14f5aa",
604652,
Expand All @@ -388,6 +392,7 @@ def test_import_single_geotiff_with_rat(
# b) normalise all extensions in the working copy (to lowercase).
pam = repo_path / "erorisk_silcdb4" / "erorisk_silcdb4.tif.aux.xml"
assert pam.is_file()
check_tile_is_reflinked(pam, repo)
assert get_hash_and_size_of_file(pam) == (
"d8f514e654a81bdcd7428886a15e300c56b5a5ff92898315d16757562d2968ca",
36908,
Expand Down
22 changes: 21 additions & 1 deletion tests/raster/test_workingcopy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import pytest
import shutil

import pytest
import reflink

from kart import is_windows
from kart.exceptions import (
WORKING_COPY_OR_IMPORT_CONFLICT,
NO_CHANGES,
Expand Down Expand Up @@ -753,3 +755,21 @@ def test_working_copy_vrt(cli_runner, data_archive):
assert "Kart maintains this VRT file" in vrt_text
assert '<SourceFilename relativeToVRT="1">EL.tif</SourceFilename>' in vrt_text
assert '<SourceFilename relativeToVRT="1">EK.tif</SourceFilename>' in vrt_text


@pytest.mark.skipif(is_windows, reason="copy-on-write not supported on windows")
def test_working_copy_reflink(cli_runner, data_archive, check_tile_is_reflinked):
# This test will show as passed if Kart's reflinks are working,
# skipped if reflinks are not supported on this filesystem or if we can't detect them,
# and failed if reflinks are supported but Kart fails to make use of them.

with data_archive("raster/aerial.tgz") as repo_path:
repo = KartRepo(repo_path)

# Extracting a repo that was tarred probably doesn't give you reflinks -
# so we recreate the working copy so that we do get reflinks.
cli_runner.invoke(["create-workingcopy", "--delete-existing"])

check_tile_is_reflinked(
repo_path / "aerial" / "aerial.tif", repo, do_raise_skip=True
)

0 comments on commit 454e56f

Please sign in to comment.