Skip to content

Commit

Permalink
insert --replace and insert(..., replace=True)
Browse files Browse the repository at this point in the history
Refs #66
  • Loading branch information
simonw committed Dec 27, 2019
1 parent dc0a625 commit 866a5bc
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 67 deletions.
8 changes: 4 additions & 4 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,15 @@ For tab-delimited data, use ``--tsv``::

$ sqlite-utils insert dogs.db dogs docs.tsv --tsv

Upserting data
==============
Insert-replacing data
=====================

Upserting works exactly like inserting, with the exception that if your data has a primary key that matches an already exsting record that record will be replaced with the new data.
Insert-replacing works exactly like inserting, with the exception that if your data has a primary key that matches an already exsting record that record will be replaced with the new data.

After running the above ``dogs.json`` example, try running this::

$ echo '{"id": 2, "name": "Pancakes", "age": 3}' | \
sqlite-utils upsert dogs.db dogs - --pk=id
sqlite-utils insert dogs.db dogs - --pk=id --replace

This will replace the record for id=2 (Pancakes) with a new record with an updated age.

Expand Down
15 changes: 8 additions & 7 deletions sqlite_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ def insert_upsert_implementation(
alter,
upsert,
ignore=False,
replace=False,
not_null=None,
default=None,
):
Expand All @@ -372,24 +373,22 @@ def insert_upsert_implementation(
docs = json.load(json_file)
if isinstance(docs, dict):
docs = [docs]
if upsert:
method = db[table].upsert_all
extra_kwargs = {}
else:
method = db[table].insert_all
extra_kwargs = {"ignore": ignore}
extra_kwargs = {"ignore": ignore, "replace": replace}
if not_null:
extra_kwargs["not_null"] = set(not_null)
if default:
extra_kwargs["defaults"] = dict(default)
method(docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs)
db[table].insert_all(docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs)


@cli.command()
@insert_upsert_options
@click.option(
"--ignore", is_flag=True, default=False, help="Ignore records if pk already exists"
)
@click.option(
"--replace", is_flag=True, default=False, help="Replace records if pk already exists"
)
def insert(
path,
table,
Expand All @@ -401,6 +400,7 @@ def insert(
batch_size,
alter,
ignore,
replace,
not_null,
default,
):
Expand All @@ -422,6 +422,7 @@ def insert(
alter=alter,
upsert=False,
ignore=ignore,
replace=replace,
not_null=not_null,
default=default,
)
Expand Down
55 changes: 16 additions & 39 deletions sqlite_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,11 +473,11 @@ def __init__(
column_order=None,
not_null=None,
defaults=None,
upsert=False,
batch_size=100,
hash_id=None,
alter=False,
ignore=False,
replace=False,
extracts=None,
):
super().__init__(db, name)
Expand All @@ -488,11 +488,11 @@ def __init__(
column_order=column_order,
not_null=not_null,
defaults=defaults,
upsert=upsert,
batch_size=batch_size,
hash_id=hash_id,
alter=alter,
ignore=ignore,
replace=replace,
extracts=extracts,
)

Expand Down Expand Up @@ -915,10 +915,10 @@ def insert(
column_order=DEFAULT,
not_null=DEFAULT,
defaults=DEFAULT,
upsert=DEFAULT,
hash_id=DEFAULT,
alter=DEFAULT,
ignore=DEFAULT,
replace=DEFAULT,
extracts=DEFAULT,
):
return self.insert_all(
Expand All @@ -928,10 +928,10 @@ def insert(
column_order=column_order,
not_null=not_null,
defaults=defaults,
upsert=upsert,
hash_id=hash_id,
alter=alter,
ignore=ignore,
replace=replace,
extracts=extracts,
)

Expand All @@ -943,11 +943,11 @@ def insert_all(
column_order=DEFAULT,
not_null=DEFAULT,
defaults=DEFAULT,
upsert=DEFAULT,
batch_size=DEFAULT,
hash_id=DEFAULT,
alter=DEFAULT,
ignore=DEFAULT,
replace=DEFAULT,
extracts=DEFAULT,
):
"""
Expand All @@ -960,17 +960,17 @@ def insert_all(
column_order = self.value_or_default("column_order", column_order)
not_null = self.value_or_default("not_null", not_null)
defaults = self.value_or_default("defaults", defaults)
upsert = self.value_or_default("upsert", upsert)
batch_size = self.value_or_default("batch_size", batch_size)
hash_id = self.value_or_default("hash_id", hash_id)
alter = self.value_or_default("alter", alter)
ignore = self.value_or_default("ignore", ignore)
replace = self.value_or_default("replace", replace)
extracts = self.value_or_default("extracts", extracts)

assert not (hash_id and pk), "Use either pk= or hash_id="
assert not (
ignore and upsert
), "Use either ignore=True or upsert=True, not both"
ignore and replace
), "Use either ignore=True or replace=True, not both"
all_columns = None
first = True
# We can only handle a max of 999 variables in a SQL insert, so
Expand Down Expand Up @@ -1009,7 +1009,7 @@ def insert_all(
all_columns.insert(0, hash_id)
first = False
or_what = ""
if upsert:
if replace:
or_what = "OR REPLACE "
elif ignore:
or_what = "OR IGNORE "
Expand Down Expand Up @@ -1076,18 +1076,7 @@ def upsert(
alter=DEFAULT,
extracts=DEFAULT,
):
return self.insert(
record,
pk=pk,
foreign_keys=foreign_keys,
column_order=column_order,
not_null=not_null,
defaults=defaults,
hash_id=hash_id,
alter=alter,
upsert=True,
extracts=extracts,
)
raise NotImplementedError

def upsert_all(
self,
Expand All @@ -1102,19 +1091,7 @@ def upsert_all(
alter=DEFAULT,
extracts=DEFAULT,
):
return self.insert_all(
records,
pk=pk,
foreign_keys=foreign_keys,
column_order=column_order,
not_null=not_null,
defaults=defaults,
batch_size=100,
hash_id=hash_id,
alter=alter,
upsert=True,
extracts=extracts,
)
raise NotImplementedError

def add_missing_columns(self, records):
needed_columns = self.detect_column_types(records)
Expand Down Expand Up @@ -1183,20 +1160,20 @@ def m2m(
)
# Ensure each record exists in other table
for record in records:
id = other_table.upsert(record, pk=pk).last_pk
m2m_table.upsert(
id = other_table.insert(record, pk=pk, replace=True).last_pk
m2m_table.insert(
{
"{}_id".format(other_table.name): id,
"{}_id".format(self.name): our_id,
}
}, replace=True
)
else:
id = other_table.lookup(lookup)
m2m_table.upsert(
m2m_table.insert(
{
"{}_id".format(other_table.name): id,
"{}_id".format(self.name): our_id,
}
}, replace=True
)
return self

Expand Down
18 changes: 9 additions & 9 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,23 +581,23 @@ def test_only_allow_one_of_nl_tsv_csv(options, db_path, tmpdir):
assert "Error: Use just one of --nl, --csv or --tsv" == result.output.strip()


def test_upsert(db_path, tmpdir):
def test_insert_replace(db_path, tmpdir):
test_insert_multiple_with_primary_key(db_path, tmpdir)
json_path = str(tmpdir / "upsert.json")
json_path = str(tmpdir / "insert-replace.json")
db = Database(db_path)
assert 20 == db["dogs"].count
upsert_dogs = [
{"id": 1, "name": "Upserted 1", "age": 4},
{"id": 2, "name": "Upserted 2", "age": 4},
insert_replace_dogs = [
{"id": 1, "name": "Insert replaced 1", "age": 4},
{"id": 2, "name": "Insert replaced 2", "age": 4},
{"id": 21, "name": "Fresh insert 21", "age": 6},
]
open(json_path, "w").write(json.dumps(upsert_dogs))
open(json_path, "w").write(json.dumps(insert_replace_dogs))
result = CliRunner().invoke(
cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"]
cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id", "--replace"]
)
assert 0 == result.exit_code
assert 0 == result.exit_code, result.output
assert 21 == db["dogs"].count
assert upsert_dogs == db.execute_returning_dicts(
assert insert_replace_dogs == db.execute_returning_dicts(
"select * from dogs where id in (1, 2, 21) order by id"
)

Expand Down
16 changes: 8 additions & 8 deletions tests/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def test_insert_row_alter_table(


@pytest.mark.parametrize("use_table_factory", [True, False])
def test_upsert_rows_alter_table(fresh_db, use_table_factory):
def test_insert_replace_rows_alter_table(fresh_db, use_table_factory):
first_row = {"id": 1, "title": "Hedgehogs of the world", "author_id": 1}
next_rows = [
{"id": 1, "title": "Hedgehogs of the World", "species": "hedgehogs"},
Expand All @@ -459,11 +459,11 @@ def test_upsert_rows_alter_table(fresh_db, use_table_factory):
if use_table_factory:
table = fresh_db.table("books", pk="id", alter=True)
table.insert(first_row)
table.upsert_all(next_rows)
table.insert_all(next_rows, replace=True)
else:
table = fresh_db["books"]
table.insert(first_row, pk="id")
table.upsert_all(next_rows, alter=True)
table.insert_all(next_rows, alter=True, replace=True)
assert {
"author_id": int,
"id": int,
Expand Down Expand Up @@ -664,11 +664,11 @@ def test_insert_ignore(fresh_db):

def test_insert_hash_id(fresh_db):
dogs = fresh_db["dogs"]
id = dogs.upsert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id").last_pk
id = dogs.insert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id").last_pk
assert "f501265970505d9825d8d9f590bfab3519fb20b1" == id
assert 1 == dogs.count
# Upserting a second time should not create a new row
id2 = dogs.upsert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id").last_pk
# Insert replacing a second time should not create a new row
id2 = dogs.insert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id", replace=True).last_pk
assert "f501265970505d9825d8d9f590bfab3519fb20b1" == id2
assert 1 == dogs.count

Expand Down Expand Up @@ -791,10 +791,10 @@ def test_drop_view(fresh_db):
assert [] == fresh_db.view_names()


def test_insert_upsert_all_empty_list(fresh_db):
def test_insert_all_empty_list(fresh_db):
fresh_db["t"].insert({"foo": 1})
assert 1 == fresh_db["t"].count
fresh_db["t"].insert_all([])
assert 1 == fresh_db["t"].count
fresh_db["t"].upsert_all([])
fresh_db["t"].insert_all([], replace=True)
assert 1 == fresh_db["t"].count

0 comments on commit 866a5bc

Please sign in to comment.