Skip to content

Commit

Permalink
Merge pull request #860 from koordinates/check-for-import-from-wc
Browse files Browse the repository at this point in the history
Error if the user tries to import from within the WC
  • Loading branch information
olsen232 committed Jun 8, 2023
2 parents c3184cb + a713be3 commit b75d575
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where
- Allow kart to check out attached files into the file-based working copy.
- Fixes a bug where tab-completion wasn't working with helper mode, affects macOS and Linux. [#851](https://github.com/koordinates/kart/pull/851)
- Fixes a bug where git-lfs (and potentially other subprocesses) wasn't working reliably with helper mode, affects macOS and Linux. [#853](https://github.com/koordinates/kart/issues/853)
- `import` now informs the user to use `add-dataset` instead if they try to import a table that is already inside the working copy. [#249](https://github.com/koordinates/kart/issues/249)

## 0.12.3

Expand Down
10 changes: 9 additions & 1 deletion kart/sqlalchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,19 @@ def strip_password(uri):
return p.geturl()


def strip_query(uri):
"""Removes query parameters from URI."""
p = urlsplit(uri)
if p.query is not None:
p = p._replace(query=None)
return p.geturl()


def separate_last_path_part(uri):
"""
Removes the last part of the path from a URI and returns it separately.
Generally useful for connecting to the URI but at a less specific level than the one given,
Eg, when given a URI of the form SCHEME://HOST/DNAME/DBSCHEMA, we want to connect to SCHEME://HOST/DBNAME
Eg, when given a URI of the form SCHEME://HOST/DBNAME/DBSCHEMA, we want to connect to SCHEME://HOST/DBNAME
and then specify the DBSCHEMA separately in each query.
"""
url = urlsplit(uri)
Expand Down
47 changes: 47 additions & 0 deletions kart/tabular/import_.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import click

from kart import is_windows
Expand Down Expand Up @@ -231,6 +233,7 @@ def table_import(

repo = ctx.obj.repo
check_git_user(repo)
check_for_import_from_within_working_copy(repo, source, tables)

base_import_source = TableImportSource.open(source)
if all_tables:
Expand Down Expand Up @@ -347,3 +350,47 @@ def table_import(
repo_key_filter=RepoKeyFilter.datasets(new_ds_paths),
create_parts_if_missing=parts_to_create,
)


def check_for_import_from_within_working_copy(repo, source, tables):
"""Don't allow an import from a source that is already within the working copy."""
from kart.sqlalchemy import DbType, strip_username_and_password, strip_query

if tables and all(":" in t for t in tables):
# Every table has been renamed.
# User can import from the WC as long as they rename everything, although it's a bit of a strange thing to do.
return

wc_location = repo.workingcopy_location
if not wc_location:
return
wc_type = DbType.from_spec(wc_location)
source_type = DbType.from_spec(source)
if not wc_type or not source_type or wc_type != source_type:
return

if wc_type == DbType.GPKG:
source_path = Path(source)
is_wc_import = _same_file(repo.workdir_path / wc_location, source_path)
else:

def normalise_uri(uri):
uri = strip_username_and_password(uri)
uri = strip_query(uri)
return uri.rstrip("/")

wc_location = normalise_uri(wc_location)
source = normalise_uri(source)
is_wc_import = wc_location == source or source.startswith(wc_location + "/")

if is_wc_import:
raise click.UsageError(
"Import-source is already inside working-copy.\n"
"The `kart import` command creates a new Kart dataset by copying from an import-source that is external to Kart.\n"
"To have Kart start tracking a table that is already inside the working copy, use `kart add-dataset TABLENAME`\n"
"To see a list of tables inside the working copy that are untracked, use `kart status --list-untracked-tables`\n"
)


def _same_file(path1, path2):
return path1.exists() and path2.exists() and path1.samefile(path2)
2 changes: 1 addition & 1 deletion kart/tabular/working_copy/gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def check_valid_location(cls, wc_location, repo):
@classmethod
def normalise_location(cls, wc_location, repo):
"""Rewrites a relative path (relative to the current directory) as relative to the repo.workdir_path."""
gpkg_path = Path(wc_location)
gpkg_path = Path(wc_location).expanduser()
if not gpkg_path.is_absolute():
try:
return str(
Expand Down
35 changes: 35 additions & 0 deletions tests/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,3 +1221,38 @@ def test_init_import_with_no_crs(
]
)
assert r.exit_code == 0, r.stderr


def test_import_from_wc(data_working_copy, cli_runner):
with data_working_copy("points") as (repo_path, wcdb):
r = cli_runner.invoke(["import", wcdb])
assert r.exit_code == 2
assert "Import-source is already inside working-copy." in r.stderr

r = cli_runner.invoke(
[
"config",
"kart.workingcopy.location",
"postgresql://user1:pass1@localhost:15432/postgres/example",
]
)
assert r.exit_code == 0, r.stderr

r = cli_runner.invoke(
[
"import",
"postgresql://user2:pass2@localhost:15432/postgres/example",
"table",
]
)
assert r.exit_code == 2
assert "Import-source is already inside working-copy." in r.stderr

r = cli_runner.invoke(
[
"import",
"postgresql://user2:pass2@localhost:15432/postgres/example/table",
]
)
assert r.exit_code == 2
assert "Import-source is already inside working-copy." in r.stderr

0 comments on commit b75d575

Please sign in to comment.