diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0d36f88d..0f13226e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,7 +9,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] os: [ubuntu-latest, windows-latest, macos-latest] steps: - uses: actions/checkout@v4 @@ -17,13 +17,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install -e '.[test]' @@ -39,13 +34,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.12' - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-publish-pip- + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install setuptools wheel twine diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index a3d55325..88431202 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -11,13 +11,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install -e '.[docs]' diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index a710e412..0aa89862 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -17,13 +17,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.11" - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: setup.py - name: Install SpatiaLite run: sudo apt-get install libsqlite3-mod-spatialite - name: Install Python dependencies diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 34c8cfbc..5d32c06e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,15 +10,10 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] numpy: [0, 1] os: [ubuntu-latest, macos-latest, windows-latest, macos-14] - # Skip 3.8 and 3.9 on macos-14 - it only has 3.10+ exclude: - - python-version: "3.8" - os: macos-14 - - python-version: "3.9" - os: macos-14 - python-version: "3.13" numpy: 1 steps: @@ -28,13 +23,8 @@ jobs: with: python-version: ${{ matrix.python-version }} allow-prereleases: true - - uses: actions/cache@v4 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install -e '.[test,mypy,flake8]' @@ -64,4 +54,4 @@ jobs: run: black . --check - name: Check if cog needs to be run run: | - cog --check README.md docs/*.rst + cog --check --diff README.md docs/*.rst diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index b0572642..aa9058f7 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -132,7 +132,7 @@ See :ref:`cli_query`. -r, --raw Raw output, first column of first row --raw-lines Raw output, first column of each row -p, --param ... Named :parameters for SQL query - --functions TEXT Python code defining one or more custom SQL + --functions TEXT Python code or file path defining custom SQL functions --load-extension TEXT Path to SQLite extension, with optional :entrypoint @@ -175,7 +175,7 @@ See :ref:`cli_memory`. sqlite-utils memory animals.csv --schema Options: - --functions TEXT Python code defining one or more custom SQL + --functions TEXT Python code or file path defining custom SQL functions --attach ... Additional databases to attach - specify alias and filepath @@ -375,7 +375,7 @@ See :ref:`cli_bulk`. Options: --batch-size INTEGER Commit every X records - --functions TEXT Python code defining one or more custom SQL functions + --functions TEXT Python code or file path defining custom SQL functions --flatten Flatten nested JSON objects, so {"a": {"b": 1}} becomes {"a_b": 1} --nl Expect newline-delimited JSON @@ -1497,7 +1497,7 @@ See :ref:`cli_spatialite`. paths. To load it from a specific path, use --load-extension. Options: - -t, --type [POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION|GEOMETRY] + -t, --type [point|linestring|polygon|multipoint|multilinestring|multipolygon|geometrycollection|geometry] Specify a geometry type for this column. [default: GEOMETRY] --srid INTEGER Spatial Reference ID. See diff --git a/docs/cli.rst b/docs/cli.rst index 4fa6042e..f6a11ef4 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -368,6 +368,22 @@ This example defines a function which extracts the domain from a URL: Every callable object defined in the block will be registered as a SQL function with the same name, with the exception of functions with names that begin with an underscore. +You can also pass the path to a Python file containing function definitions: + +.. code-block:: bash + + sqlite-utils query sites.db "select url, domain(url) from urls" --functions functions.py + +The ``--functions`` option can be used multiple times to load functions from multiple sources: + +.. code-block:: bash + + sqlite-utils query sites.db "select url, domain(url), extract_path(url) from urls" \ + --functions domain_funcs.py \ + --functions 'def extract_path(url): + from urllib.parse import urlparse + return urlparse(url).path' + .. _cli_query_extensions: SQLite extensions @@ -1128,7 +1144,7 @@ You can insert binary data into a BLOB column by first encoding it using base64 Inserting newline-delimited JSON -------------------------------- -You can also import `newline-delimited JSON `__ using the ``--nl`` option: +You can also import newline-delimited JSON (see `JSON Lines `__) using the ``--nl`` option: .. code-block:: bash diff --git a/docs/python-api.rst b/docs/python-api.rst index 7b11c225..c6bf7762 100644 --- a/docs/python-api.rst +++ b/docs/python-api.rst @@ -2711,7 +2711,7 @@ By default, the name of the Python function will be used as the name of the SQL print(db.execute('select rev("hello")').fetchone()[0]) -Python 3.8 added the ability to register `deterministic SQLite functions `__, allowing you to indicate that a function will return the exact same result for any given inputs and hence allowing SQLite to apply some performance optimizations. You can mark a function as deterministic using ``deterministic=True``, like this: +If a function will return the exact same result for any given inputs you can register it as a `deterministic SQLite function `__ allowing SQLite to apply some performance optimizations: .. code-block:: python @@ -2719,8 +2719,6 @@ Python 3.8 added the ability to register `deterministic SQLite functions =8.3.1", "click-default-group>=1.2.3", "tabulate", "python-dateutil", "pluggy", + "pip", ], extras_require={ "test": ["pytest", "black>=24.1.1", "hypothesis", "cogapp"], @@ -64,20 +65,18 @@ def get_long_description(): "Issues": "https://github.com/simonw/sqlite-utils/issues", "CI": "https://github.com/simonw/sqlite-utils/actions", }, - python_requires=">=3.8", + python_requires=">=3.10", classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Intended Audience :: End Users/Desktop", "Topic :: Database", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ], # Needed to bundle py.typed so mypy can see it: zip_safe=False, diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index cc2d86eb..b5821f97 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -1,7 +1,7 @@ import base64 import click from click_default_group import DefaultGroup # type: ignore -from datetime import datetime +from datetime import datetime, timezone import hashlib import pathlib from runpy import run_module @@ -962,7 +962,7 @@ def insert_upsert_implementation( db = sqlite_utils.Database(path) _load_extensions(db, load_extension) if functions: - _register_functions(db, functions) + _register_functions_from_multiple(db, functions) if (delimiter or quotechar or sniff or no_headers) and not tsv: csv = True if (nl + csv + tsv) >= 2: @@ -1370,7 +1370,9 @@ def upsert( @click.argument("file", type=click.File("rb"), required=True) @click.option("--batch-size", type=int, default=100, help="Commit every X records") @click.option( - "--functions", help="Python code defining one or more custom SQL functions" + "--functions", + help="Python code or file path defining custom SQL functions", + multiple=True, ) @import_options @load_extension_option @@ -1759,7 +1761,9 @@ def drop_view(path, view, ignore, load_extension): help="Named :parameters for SQL query", ) @click.option( - "--functions", help="Python code defining one or more custom SQL functions" + "--functions", + help="Python code or file path defining custom SQL functions", + multiple=True, ) @load_extension_option def query( @@ -1796,7 +1800,7 @@ def query( db.register_fts4_bm25() if functions: - _register_functions(db, functions) + _register_functions_from_multiple(db, functions) _execute_query( db, @@ -1824,7 +1828,9 @@ def query( ) @click.argument("sql") @click.option( - "--functions", help="Python code defining one or more custom SQL functions" + "--functions", + help="Python code or file path defining custom SQL functions", + multiple=True, ) @click.option( "--attach", @@ -1996,7 +2002,7 @@ def memory( db.register_fts4_bm25() if functions: - _register_functions(db, functions) + _register_functions_from_multiple(db, functions) if return_db: return db @@ -3203,8 +3209,12 @@ def __init__(self, exception, path): "ctime": lambda p: p.stat().st_ctime, "mtime_int": lambda p: int(p.stat().st_mtime), "ctime_int": lambda p: int(p.stat().st_ctime), - "mtime_iso": lambda p: datetime.utcfromtimestamp(p.stat().st_mtime).isoformat(), - "ctime_iso": lambda p: datetime.utcfromtimestamp(p.stat().st_ctime).isoformat(), + "mtime_iso": lambda p: datetime.fromtimestamp(p.stat().st_mtime, timezone.utc) + .replace(tzinfo=None) + .isoformat(), + "ctime_iso": lambda p: datetime.fromtimestamp(p.stat().st_ctime, timezone.utc) + .replace(tzinfo=None) + .isoformat(), "size": lambda p: p.stat().st_size, "stem": lambda p: p.stem, "suffix": lambda p: p.suffix, @@ -3281,6 +3291,13 @@ def _load_extensions(db, load_extension): def _register_functions(db, functions): # Register any Python functions as SQL functions: + # Check if this is a file path + if "\n" not in functions and functions.endswith(".py"): + try: + functions = pathlib.Path(functions).read_text() + except FileNotFoundError: + raise click.ClickException("File not found: {}".format(functions)) + sqlite3.enable_callback_tracebacks(True) globals = {} try: @@ -3291,3 +3308,12 @@ def _register_functions(db, functions): for name, value in globals.items(): if callable(value) and not name.startswith("_"): db.register_function(value, name=name) + + +def _register_functions_from_multiple(db, functions_list): + """Register functions from multiple --functions arguments.""" + if not functions_list: + return + for functions in functions_list: + if isinstance(functions, str) and functions.strip(): + _register_functions(db, functions) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index ef8c40e6..8939a688 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -237,36 +237,43 @@ class Default: class AlterError(Exception): "Error altering table" + pass class NoObviousTable(Exception): "Could not tell which table this operation refers to" + pass class NoTable(Exception): "Specified table does not exist" + pass class BadPrimaryKey(Exception): "Table does not have a single obvious primary key" + pass class NotFoundError(Exception): "Record not found" + pass class PrimaryKeyRequired(Exception): "Primary key needs to be specified" + pass class InvalidColumns(Exception): "Specified columns do not exist" + pass @@ -368,7 +375,7 @@ def __init__( pm.hook.prepare_connection(conn=self.conn) self.strict = strict - def close(self): + def close(self) -> None: "Close the SQLite connection, and the underlying database file" self.conn.close() @@ -3203,7 +3210,7 @@ def insert( :param not_null: Set of strings specifying columns that should be ``NOT NULL``. :param defaults: Dictionary specifying default values for specific columns. :param hash_id: Name of a column to create and use as a primary key, where the - value of thet primary key will be derived as a SHA1 hash of the other column values + value of that primary key will be derived as a SHA1 hash of the other column values in the record. ``hash_id="id"`` is a common column name used for this. :param alter: Boolean, should any missing columns be added automatically? :param ignore: Boolean, if a record already exists with this primary key, ignore this insert. @@ -3852,7 +3859,7 @@ def jsonify_if_needed(value): def resolve_extracts( - extracts: Optional[Union[Dict[str, str], List[str], Tuple[str]]] + extracts: Optional[Union[Dict[str, str], List[str], Tuple[str]]], ) -> dict: if extracts is None: extracts = {} diff --git a/tests/test_cli.py b/tests/test_cli.py index 4e564f13..f32ddb41 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -806,6 +806,77 @@ def test_hidden_functions_are_hidden(db_path): assert "_two" not in functions +def test_query_functions_from_file(db_path, tmp_path): + # Create a temporary file with function definitions + functions_file = tmp_path / "my_functions.py" + functions_file.write_text(TEST_FUNCTIONS) + + result = CliRunner().invoke( + cli.cli, + [ + db_path, + "select zero(), one(1), two(1, 2)", + "--functions", + str(functions_file), + ], + ) + assert result.exit_code == 0 + assert json.loads(result.output.strip()) == [ + {"zero()": 0, "one(1)": 1, "two(1, 2)": 3} + ] + + +def test_query_functions_file_not_found(db_path): + result = CliRunner().invoke( + cli.cli, + [ + db_path, + "select zero()", + "--functions", + "nonexistent.py", + ], + ) + assert result.exit_code == 1 + assert "File not found: nonexistent.py" in result.output + + +def test_query_functions_multiple_invocations(db_path): + # Test using --functions multiple times + result = CliRunner().invoke( + cli.cli, + [ + db_path, + "select triple(2), quadruple(2)", + "--functions", + "def triple(x):\n return x * 3", + "--functions", + "def quadruple(x):\n return x * 4", + ], + ) + assert result.exit_code == 0 + assert json.loads(result.output.strip()) == [{"triple(2)": 6, "quadruple(2)": 8}] + + +def test_query_functions_file_and_inline(db_path, tmp_path): + # Test combining file and inline code + functions_file = tmp_path / "file_funcs.py" + functions_file.write_text("def triple(x):\n return x * 3") + + result = CliRunner().invoke( + cli.cli, + [ + db_path, + "select triple(2), quadruple(2)", + "--functions", + str(functions_file), + "--functions", + "def quadruple(x):\n return x * 4", + ], + ) + assert result.exit_code == 0 + assert json.loads(result.output.strip()) == [{"triple(2)": 6, "quadruple(2)": 8}] + + LOREM_IPSUM_COMPRESSED = ( b"x\x9c\xed\xd1\xcdq\x03!\x0c\x05\xe0\xbb\xabP\x01\x1eW\x91\xdc|M\x01\n\xc8\x8e" b"f\xf83H\x1e\x97\x1f\x91M\x8e\xe9\xe0\xdd\x96\x05\x84\xf4\xbek\x9fRI\xc7\xf2J" @@ -916,7 +987,7 @@ def test_query_json_with_json_cols(db_path): @pytest.mark.parametrize( "content,is_binary", - [(b"\x00\x0Fbinary", True), ("this is text", False), (1, False), (1.5, False)], + [(b"\x00\x0fbinary", True), ("this is text", False), (1, False), (1.5, False)], ) def test_query_raw(db_path, content, is_binary): Database(db_path)["files"].insert({"content": content}) @@ -931,7 +1002,7 @@ def test_query_raw(db_path, content, is_binary): @pytest.mark.parametrize( "content,is_binary", - [(b"\x00\x0Fbinary", True), ("this is text", False), (1, False), (1.5, False)], + [(b"\x00\x0fbinary", True), ("this is text", False), (1, False), (1.5, False)], ) def test_query_raw_lines(db_path, content, is_binary): Database(db_path)["files"].insert_all({"content": content} for _ in range(3)) diff --git a/tests/test_cli_bulk.py b/tests/test_cli_bulk.py index 909ed096..514f4acc 100644 --- a/tests/test_cli_bulk.py +++ b/tests/test_cli_bulk.py @@ -45,6 +45,32 @@ def test_cli_bulk(test_db_and_path): ] == list(db["example"].rows) +def test_cli_bulk_multiple_functions(test_db_and_path): + db, db_path = test_db_and_path + result = CliRunner().invoke( + cli.cli, + [ + "bulk", + db_path, + "insert into example (id, name) values (:id, myupper(mylower(:name)))", + "-", + "--nl", + "--functions", + "myupper = lambda s: s.upper()", + "--functions", + "mylower = lambda s: s.lower()", + ], + input='{"id": 3, "name": "ThReE"}\n{"id": 4, "name": "FoUr"}\n', + ) + assert result.exit_code == 0, result.output + assert [ + {"id": 1, "name": "One"}, + {"id": 2, "name": "Two"}, + {"id": 3, "name": "THREE"}, + {"id": 4, "name": "FOUR"}, + ] == list(db["example"].rows) + + def test_cli_bulk_batch_size(test_db_and_path): db, db_path = test_db_and_path proc = subprocess.Popen( diff --git a/tests/test_cli_memory.py b/tests/test_cli_memory.py index ac0a177d..dfe9915e 100644 --- a/tests/test_cli_memory.py +++ b/tests/test_cli_memory.py @@ -307,6 +307,22 @@ def test_memory_functions(): assert result.output.strip() == '[{"hello()": "Hello"}]' +def test_memory_functions_multiple(): + result = CliRunner().invoke( + cli.cli, + [ + "memory", + "select triple(2), quadruple(2)", + "--functions", + "def triple(x):\n return x * 3", + "--functions", + "def quadruple(x):\n return x * 4", + ], + ) + assert result.exit_code == 0 + assert result.output.strip() == '[{"triple(2)": 6, "quadruple(2)": 8}]' + + def test_memory_return_db(tmpdir): # https://github.com/simonw/sqlite-utils/issues/643 from sqlite_utils.cli import cli diff --git a/tests/test_recipes.py b/tests/test_recipes.py index eca39874..ff042253 100644 --- a/tests/test_recipes.py +++ b/tests/test_recipes.py @@ -64,6 +64,7 @@ def test_dayfirst_yearfirst(fresh_db, recipe, kwargs, expected): @pytest.mark.parametrize("fn", ("parsedate", "parsedatetime")) @pytest.mark.parametrize("errors", (None, recipes.SET_NULL, recipes.IGNORE)) +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") def test_dateparse_errors(fresh_db, fn, errors): fresh_db["example"].insert_all( [