insert --replace and insert(..., replace=True)

Refs #66
simonw · Dec 27, 2019 · 866a5bc · 866a5bc
1 parent dc0a625
commit 866a5bc
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 67 deletions.
diff --git a/docs/cli.rst b/docs/cli.rst
@@ -265,15 +265,15 @@ For tab-delimited data, use ``--tsv``::
 
     $ sqlite-utils insert dogs.db dogs docs.tsv --tsv
 
-Upserting data
-==============
+Insert-replacing data
+=====================
 
-Upserting works exactly like inserting, with the exception that if your data has a primary key that matches an already exsting record that record will be replaced with the new data.
+Insert-replacing works exactly like inserting, with the exception that if your data has a primary key that matches an already exsting record that record will be replaced with the new data.
 
 After running the above ``dogs.json`` example, try running this::
 
     $ echo '{"id": 2, "name": "Pancakes", "age": 3}' | \
-        sqlite-utils upsert dogs.db dogs - --pk=id
+        sqlite-utils insert dogs.db dogs - --pk=id --replace
 
 This will replace the record for id=2 (Pancakes) with a new record with an updated age.
 

diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py
@@ -353,6 +353,7 @@ def insert_upsert_implementation(
     alter,
     upsert,
     ignore=False,
+    replace=False,
     not_null=None,
     default=None,
 ):
@@ -372,24 +373,22 @@ def insert_upsert_implementation(
         docs = json.load(json_file)
         if isinstance(docs, dict):
             docs = [docs]
-    if upsert:
-        method = db[table].upsert_all
-        extra_kwargs = {}
-    else:
-        method = db[table].insert_all
-        extra_kwargs = {"ignore": ignore}
+    extra_kwargs = {"ignore": ignore, "replace": replace}
     if not_null:
         extra_kwargs["not_null"] = set(not_null)
     if default:
         extra_kwargs["defaults"] = dict(default)
-    method(docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs)
+    db[table].insert_all(docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs)
 
 
 @cli.command()
 @insert_upsert_options
 @click.option(
     "--ignore", is_flag=True, default=False, help="Ignore records if pk already exists"
 )
+@click.option(
+    "--replace", is_flag=True, default=False, help="Replace records if pk already exists"
+)
 def insert(
     path,
     table,
@@ -401,6 +400,7 @@ def insert(
     batch_size,
     alter,
     ignore,
+    replace,
     not_null,
     default,
 ):
@@ -422,6 +422,7 @@ def insert(
         alter=alter,
         upsert=False,
         ignore=ignore,
+        replace=replace,
         not_null=not_null,
         default=default,
     )

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
@@ -473,11 +473,11 @@ def __init__(
         column_order=None,
         not_null=None,
         defaults=None,
-        upsert=False,
         batch_size=100,
         hash_id=None,
         alter=False,
         ignore=False,
+        replace=False,
         extracts=None,
     ):
         super().__init__(db, name)
@@ -488,11 +488,11 @@ def __init__(
             column_order=column_order,
             not_null=not_null,
             defaults=defaults,
-            upsert=upsert,
             batch_size=batch_size,
             hash_id=hash_id,
             alter=alter,
             ignore=ignore,
+            replace=replace,
             extracts=extracts,
         )
 
@@ -915,10 +915,10 @@ def insert(
         column_order=DEFAULT,
         not_null=DEFAULT,
         defaults=DEFAULT,
-        upsert=DEFAULT,
         hash_id=DEFAULT,
         alter=DEFAULT,
         ignore=DEFAULT,
+        replace=DEFAULT,
         extracts=DEFAULT,
     ):
         return self.insert_all(
@@ -928,10 +928,10 @@ def insert(
             column_order=column_order,
             not_null=not_null,
             defaults=defaults,
-            upsert=upsert,
             hash_id=hash_id,
             alter=alter,
             ignore=ignore,
+            replace=replace,
             extracts=extracts,
         )
 
@@ -943,11 +943,11 @@ def insert_all(
         column_order=DEFAULT,
         not_null=DEFAULT,
         defaults=DEFAULT,
-        upsert=DEFAULT,
         batch_size=DEFAULT,
         hash_id=DEFAULT,
         alter=DEFAULT,
         ignore=DEFAULT,
+        replace=DEFAULT,
         extracts=DEFAULT,
     ):
         """
@@ -960,17 +960,17 @@ def insert_all(
         column_order = self.value_or_default("column_order", column_order)
         not_null = self.value_or_default("not_null", not_null)
         defaults = self.value_or_default("defaults", defaults)
-        upsert = self.value_or_default("upsert", upsert)
         batch_size = self.value_or_default("batch_size", batch_size)
         hash_id = self.value_or_default("hash_id", hash_id)
         alter = self.value_or_default("alter", alter)
         ignore = self.value_or_default("ignore", ignore)
+        replace = self.value_or_default("replace", replace)
         extracts = self.value_or_default("extracts", extracts)
 
         assert not (hash_id and pk), "Use either pk= or hash_id="
         assert not (
-            ignore and upsert
-        ), "Use either ignore=True or upsert=True, not both"
+            ignore and replace
+        ), "Use either ignore=True or replace=True, not both"
         all_columns = None
         first = True
         # We can only handle a max of 999 variables in a SQL insert, so
@@ -1009,7 +1009,7 @@ def insert_all(
                     all_columns.insert(0, hash_id)
             first = False
             or_what = ""
-            if upsert:
+            if replace:
                 or_what = "OR REPLACE "
             elif ignore:
                 or_what = "OR IGNORE "
@@ -1076,18 +1076,7 @@ def upsert(
         alter=DEFAULT,
         extracts=DEFAULT,
     ):
-        return self.insert(
-            record,
-            pk=pk,
-            foreign_keys=foreign_keys,
-            column_order=column_order,
-            not_null=not_null,
-            defaults=defaults,
-            hash_id=hash_id,
-            alter=alter,
-            upsert=True,
-            extracts=extracts,
-        )
+        raise NotImplementedError
 
     def upsert_all(
         self,
@@ -1102,19 +1091,7 @@ def upsert_all(
         alter=DEFAULT,
         extracts=DEFAULT,
     ):
-        return self.insert_all(
-            records,
-            pk=pk,
-            foreign_keys=foreign_keys,
-            column_order=column_order,
-            not_null=not_null,
-            defaults=defaults,
-            batch_size=100,
-            hash_id=hash_id,
-            alter=alter,
-            upsert=True,
-            extracts=extracts,
-        )
+        raise NotImplementedError
 
     def add_missing_columns(self, records):
         needed_columns = self.detect_column_types(records)
@@ -1183,20 +1160,20 @@ def m2m(
             )
             # Ensure each record exists in other table
             for record in records:
-                id = other_table.upsert(record, pk=pk).last_pk
-                m2m_table.upsert(
+                id = other_table.insert(record, pk=pk, replace=True).last_pk
+                m2m_table.insert(
                     {
                         "{}_id".format(other_table.name): id,
                         "{}_id".format(self.name): our_id,
-                    }
+                    }, replace=True
                 )
         else:
             id = other_table.lookup(lookup)
-            m2m_table.upsert(
+            m2m_table.insert(
                 {
                     "{}_id".format(other_table.name): id,
                     "{}_id".format(self.name): our_id,
-                }
+                }, replace=True
             )
         return self
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -581,23 +581,23 @@ def test_only_allow_one_of_nl_tsv_csv(options, db_path, tmpdir):
     assert "Error: Use just one of --nl, --csv or --tsv" == result.output.strip()
 
 
-def test_upsert(db_path, tmpdir):
+def test_insert_replace(db_path, tmpdir):
     test_insert_multiple_with_primary_key(db_path, tmpdir)
-    json_path = str(tmpdir / "upsert.json")
+    json_path = str(tmpdir / "insert-replace.json")
     db = Database(db_path)
     assert 20 == db["dogs"].count
-    upsert_dogs = [
-        {"id": 1, "name": "Upserted 1", "age": 4},
-        {"id": 2, "name": "Upserted 2", "age": 4},
+    insert_replace_dogs = [
+        {"id": 1, "name": "Insert replaced 1", "age": 4},
+        {"id": 2, "name": "Insert replaced 2", "age": 4},
         {"id": 21, "name": "Fresh insert 21", "age": 6},
     ]
-    open(json_path, "w").write(json.dumps(upsert_dogs))
+    open(json_path, "w").write(json.dumps(insert_replace_dogs))
     result = CliRunner().invoke(
-        cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"]
+        cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id", "--replace"]
     )
-    assert 0 == result.exit_code
+    assert 0 == result.exit_code, result.output
     assert 21 == db["dogs"].count
-    assert upsert_dogs == db.execute_returning_dicts(
+    assert insert_replace_dogs == db.execute_returning_dicts(
         "select * from dogs where id in (1, 2, 21) order by id"
     )
 

diff --git a/tests/test_create.py b/tests/test_create.py
@@ -445,7 +445,7 @@ def test_insert_row_alter_table(
 
 
 @pytest.mark.parametrize("use_table_factory", [True, False])
-def test_upsert_rows_alter_table(fresh_db, use_table_factory):
+def test_insert_replace_rows_alter_table(fresh_db, use_table_factory):
     first_row = {"id": 1, "title": "Hedgehogs of the world", "author_id": 1}
     next_rows = [
         {"id": 1, "title": "Hedgehogs of the World", "species": "hedgehogs"},
@@ -459,11 +459,11 @@ def test_upsert_rows_alter_table(fresh_db, use_table_factory):
     if use_table_factory:
         table = fresh_db.table("books", pk="id", alter=True)
         table.insert(first_row)
-        table.upsert_all(next_rows)
+        table.insert_all(next_rows, replace=True)
     else:
         table = fresh_db["books"]
         table.insert(first_row, pk="id")
-        table.upsert_all(next_rows, alter=True)
+        table.insert_all(next_rows, alter=True, replace=True)
     assert {
         "author_id": int,
         "id": int,
@@ -664,11 +664,11 @@ def test_insert_ignore(fresh_db):
 
 def test_insert_hash_id(fresh_db):
     dogs = fresh_db["dogs"]
-    id = dogs.upsert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id").last_pk
+    id = dogs.insert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id").last_pk
     assert "f501265970505d9825d8d9f590bfab3519fb20b1" == id
     assert 1 == dogs.count
-    # Upserting a second time should not create a new row
-    id2 = dogs.upsert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id").last_pk
+    # Insert replacing a second time should not create a new row
+    id2 = dogs.insert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id", replace=True).last_pk
     assert "f501265970505d9825d8d9f590bfab3519fb20b1" == id2
     assert 1 == dogs.count
 
@@ -791,10 +791,10 @@ def test_drop_view(fresh_db):
     assert [] == fresh_db.view_names()
 
 
-def test_insert_upsert_all_empty_list(fresh_db):
+def test_insert_all_empty_list(fresh_db):
     fresh_db["t"].insert({"foo": 1})
     assert 1 == fresh_db["t"].count
     fresh_db["t"].insert_all([])
     assert 1 == fresh_db["t"].count
-    fresh_db["t"].upsert_all([])
+    fresh_db["t"].insert_all([], replace=True)
     assert 1 == fresh_db["t"].count