diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..ce66cbec8 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,12 @@ +version: 2 + +sphinx: + configuration: docs/conf.py + +python: + version: "3.8" + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/docs/Makefile b/docs/Makefile index a279768f1..5578ae3ab 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,4 +20,4 @@ help: @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) livehtml: - sphinx-autobuild -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(0) + sphinx-autobuild -a -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(0) --watch ../sqlite_utils diff --git a/docs/conf.py b/docs/conf.py index b4c7f4427..1f5a1583f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,8 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.extlinks"] +extensions = ["sphinx.ext.extlinks", "sphinx.ext.autodoc"] +autodoc_member_order = "bysource" extlinks = { "issue": ("https://github.com/simonw/sqlite-utils/issues/%s", "#"), diff --git a/docs/index.rst b/docs/index.rst index 93b0bc040..581f3069c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -32,6 +32,7 @@ Contents installation cli python-api + reference contributing changelog diff --git a/docs/python-api.rst b/docs/python-api.rst index 70baf0cce..b79392094 100644 --- a/docs/python-api.rst +++ b/docs/python-api.rst @@ -962,7 +962,7 @@ The ``table.analyze_column(column, common_limit=10, value_truncate=None)`` metho The name of the column ``total_rows`` - The total number of rows in the table` + The total number of rows in the table ``num_null`` The number of rows for which this column is null diff --git a/docs/reference.rst b/docs/reference.rst new file mode 100644 index 000000000..e29f4686a --- /dev/null +++ b/docs/reference.rst @@ -0,0 +1,69 @@ +=============== + API Reference +=============== + +.. contents:: :local: + +.. _reference_db_database: + +sqlite_utils.db.Database +======================== + +.. autoclass:: sqlite_utils.db.Database + :members: + :undoc-members: + :show-inheritance: + :special-members: __getitem__ + :exclude-members: use_counts_table, execute_returning_dicts, resolve_foreign_keys + +.. _reference_db_queryable: + +sqlite_utils.db.Queryable +========================= + +:ref:`Table ` and :ref:`View ` are both subclasses of ``Queryable``, providing access to the following methods: + +.. autoclass:: sqlite_utils.db.Queryable + :members: + :undoc-members: + :exclude-members: execute_count + +.. _reference_db_table: + +sqlite_utils.db.Table +===================== + +.. autoclass:: sqlite_utils.db.Table + :members: + :undoc-members: + :show-inheritance: + :exclude-members: guess_foreign_column, value_or_default, build_insert_queries_and_params, insert_chunk, add_missing_columns + +.. _reference_db_view: + +sqlite_utils.db.View +==================== + +.. autoclass:: sqlite_utils.db.View + :members: + :undoc-members: + :show-inheritance: + +.. _reference_db_other: + +Other +===== + +.. _reference_db_other_column: + +sqlite_utils.db.Column +---------------------- + +.. autoclass:: sqlite_utils.db.Column + +.. _reference_db_other_column_details: + +sqlite_utils.db.ColumnDetails +----------------------------- + +.. autoclass:: sqlite_utils.db.ColumnDetails diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index a21acc44c..c7dc8321a 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -21,7 +21,19 @@ from sqlite_fts4 import rank_bm25 # type: ignore import sys import textwrap -from typing import Generator, Iterable, Union, Optional, List +from typing import ( + cast, + Any, + Callable, + Dict, + Generator, + Iterable, + Union, + Optional, + List, + Set, + Tuple, +) import uuid SQLITE_MAX_VARS = 999 @@ -41,7 +53,7 @@ ) ) \s+(IF\s+NOT\s+EXISTS\s+)? # IF NOT EXISTS (optional) -USING\s+(?P\w+) # e.g. USING FTS5 +USING\s+(?P\w+) # for example USING FTS5 """, re.VERBOSE | re.IGNORECASE, ) @@ -49,7 +61,7 @@ try: import pandas as pd # type: ignore except ImportError: - pd = None + pd = None # type: ignore try: import numpy as np # type: ignore @@ -59,6 +71,28 @@ Column = namedtuple( "Column", ("cid", "name", "type", "notnull", "default_value", "is_pk") ) +Column.__doc__ = """ +Describes a SQLite column returned by the :attr:`.Table.columns` property. + +``cid`` + Column index + +``name`` + Column name + +``type`` + Column type + +``notnull`` + Does the column have a ``not null` constraint + +``default_value`` + Default value for this column + +``is_pk`` + Is this column part of the primary key +""" + ColumnDetails = namedtuple( "ColumnDetails", ( @@ -72,6 +106,34 @@ "least_common", ), ) +ColumnDetails.__doc__ = """ +Summary information about a column, see :ref:`python_api_analyze_column`. + +``table`` + The name of the table + +``column`` + The name of the column + +``total_rows`` + The total number of rows in the table + +``num_null`` + The number of rows for which this column is null + +``num_blank`` + The number of rows for which this column is blank (the empty string) + +``num_distinct`` + The number of distinct values in this column + +``most_common`` + The ``N`` most common values as a list of ``(value, count)`` tuples`, or ``None`` if the table consists entirely of distinct values + +``least_common`` + The ``N`` least common values as a list of ``(value, count)`` tuples`, or ``None`` if the table is entirely distinct + or if the number of distinct values is less than N (since they will already have been returned in ``most_common``) +""" ForeignKey = namedtuple( "ForeignKey", ("table", "column", "other_table", "other_column") ) @@ -83,7 +145,11 @@ Trigger = namedtuple("Trigger", ("name", "table", "sql")) -DEFAULT = object() +class Default: + pass + + +DEFAULT = Default() COLUMN_TYPE_MAPPING = { float: "FLOAT", @@ -133,26 +199,32 @@ class AlterError(Exception): + "Error altering table" pass class NoObviousTable(Exception): + "Could not tell which table this operation refers to" pass class BadPrimaryKey(Exception): + "Table does not have a single obvious primary key" pass class NotFoundError(Exception): + "Record not found" pass class PrimaryKeyRequired(Exception): + "Primary key needs to be specified" pass class InvalidColumns(Exception): + "Specified columns do not exist" pass @@ -176,17 +248,32 @@ def __init__(self, values): class Database: + """ + Wrapper for a SQLite database connection that adds a variety of useful utility methods. + + - ``filename_or_conn`` - String path to a file, or a ``pathlib.Path`` object, or a + ``sqlite3`` connection + - ``memory`` - set to ``True`` to create an in-memory database + - ``recreate`` - set to ``True`` to delete and recreate a file database (**dangerous**) + - ``recursive_triggers`` - defaults to ``True``, which sets ``PRAGMA recursive_triggers=on;`` - + set to ``False`` to avoid setting this pragma + - ``tracer`` - set a tracer function (``print`` works for this) which will be called with + ``sql, parameters`` every time a SQL query is executed + - ``use_counts_table`` - set to ``True`` to use a cached counts table, if available. See + :ref:`python_api_cached_table_counts`. + """ + _counts_table_name = "_counts" use_counts_table = False def __init__( self, filename_or_conn=None, - memory=False, - recreate=False, - recursive_triggers=True, - tracer=None, - use_counts_table=False, + memory: bool = False, + recreate: bool = False, + recursive_triggers: bool = True, + tracer: Callable = None, + use_counts_table: bool = False, ): assert (filename_or_conn is not None and not memory) or ( filename_or_conn is None and memory @@ -203,11 +290,24 @@ def __init__( self._tracer = tracer if recursive_triggers: self.execute("PRAGMA recursive_triggers=on;") - self._registered_functions = set() + self._registered_functions: set = set() self.use_counts_table = use_counts_table @contextlib.contextmanager - def tracer(self, tracer=None): + def tracer(self, tracer: Callable = None): + """ + Context manager to temporarily set a tracer function - all executed SQL queries will + be passed to this. + + The tracer function should accept two arguments: ``sql`` and ``parameters`` + + Example usage:: + + with db.tracer(print): + db["creatures"].insert({"name": "Cleo"}) + + See :ref:`python_api_tracing`. + """ prev_tracer = self._tracer self._tracer = tracer or print try: @@ -215,13 +315,39 @@ def tracer(self, tracer=None): finally: self._tracer = prev_tracer - def __getitem__(self, table_name): + def __getitem__(self, table_name: str) -> Union["Table", "View"]: + """ + ``db[table_name]`` returns a :class:`.Table` object for the table with the specified name. + If the table does not exist yet it will be created the first time data is inserted into it. + """ return self.table(table_name) def __repr__(self): return "".format(self.conn) - def register_function(self, fn=None, deterministic=None, replace=False): + def register_function( + self, fn: Callable = None, deterministic: bool = False, replace: bool = False + ): + """ + ``fn`` will be made available as a function within SQL, with the same name and number + of arguments. Can be used as a decorator:: + + @db.register + def upper(value): + return str(value).upper() + + The decorator can take arguments:: + + @db.register(deterministic=True, replace=True) + def upper(value): + return str(value).upper() + + - ``deterministic`` - set ``True`` for functions that always returns the same output for a given input + - ``replace`` - set ``True`` to replace an existing function with the same name - otherwise throw an error + + See :ref:`python_api_register_function`. + """ + def register(fn): name = fn.__name__ arity = len(inspect.signature(fn).parameters) @@ -240,9 +366,15 @@ def register(fn): register(fn) def register_fts4_bm25(self): + "Register the ``rank_bm25(match_info)`` function used for calculating relevance with SQLite FTS4." self.register_function(rank_bm25, deterministic=True) - def attach(self, alias, filepath): + def attach(self, alias: str, filepath: Union[str, pathlib.Path]): + """ + Attach another SQLite database file to this connection with the specified alias, equivalent to:: + + ATTACH DATABASE 'filepath.db' AS alias + """ attach_sql = """ ATTACH DATABASE '{}' AS [{}]; """.format( @@ -250,7 +382,19 @@ def attach(self, alias, filepath): ).strip() self.execute(attach_sql) - def execute(self, sql, parameters=None): + def query( + self, sql: str, params: Optional[Union[Iterable, dict]] = None + ) -> Generator[dict, None, None]: + "Execute ``sql`` and return an iterable of dictionaries representing each row." + cursor = self.execute(sql, params or tuple()) + keys = [d[0] for d in cursor.description] + for row in cursor: + yield dict(zip(keys, row)) + + def execute( + self, sql: str, parameters: Optional[Union[Iterable, dict]] = None + ) -> sqlite3.Cursor: + "Execute SQL query and return a ``sqlite3.Cursor``." if self._tracer: self._tracer(sql, parameters) if parameters is not None: @@ -258,16 +402,19 @@ def execute(self, sql, parameters=None): else: return self.conn.execute(sql) - def executescript(self, sql): + def executescript(self, sql: str) -> sqlite3.Cursor: + "Execute multiple SQL statements separated by ; and return the ``sqlite3.Cursor``." if self._tracer: self._tracer(sql, None) return self.conn.executescript(sql) - def table(self, table_name, **kwargs): + def table(self, table_name: str, **kwargs) -> Union["Table", "View"]: + "Return a table object, optionally configured with default options." klass = View if table_name in self.view_names() else Table return klass(self, table_name, **kwargs) - def quote(self, value): + def quote(self, value: str) -> str: + "Apply SQLite string quoting to a value, including wrappping it in single quotes." # Normally we would use .execute(sql, [params]) for escaping, but # occasionally that isn't available - most notable when we need # to include a "... DEFAULT 'value'" in a column definition. @@ -277,7 +424,8 @@ def quote(self, value): {"value": value}, ).fetchone()[0] - def table_names(self, fts4=False, fts5=False): + def table_names(self, fts4: bool = False, fts5: bool = False) -> List[str]: + "A list of string table names in this database." where = ["type = 'table'"] if fts4: where.append("sql like '%USING FTS4%'") @@ -286,7 +434,8 @@ def table_names(self, fts4=False, fts5=False): sql = "select name from sqlite_master where {}".format(" AND ".join(where)) return [r[0] for r in self.execute(sql).fetchall()] - def view_names(self): + def view_names(self) -> List[str]: + "A list of string view names in this database." return [ r[0] for r in self.execute( @@ -295,15 +444,18 @@ def view_names(self): ] @property - def tables(self): - return [self[name] for name in self.table_names()] + def tables(self) -> List["Table"]: + "A list of Table objects in this database." + return cast(List["Table"], [self[name] for name in self.table_names()]) @property - def views(self): - return [self[name] for name in self.view_names()] + def views(self) -> List["View"]: + "A list of View objects in this database." + return cast(List["View"], [self[name] for name in self.view_names()]) @property - def triggers(self): + def triggers(self) -> List[Trigger]: + "A list of ``(name, table_name, sql)`` tuples representing triggers in this database." return [ Trigger(*r) for r in self.execute( @@ -312,12 +464,13 @@ def triggers(self): ] @property - def triggers_dict(self): - "Returns {trigger_name: sql} dictionary" + def triggers_dict(self) -> Dict[str, str]: + "A ``{trigger_name: sql}`` dictionary of triggers in this database." return {trigger.name: trigger.sql for trigger in self.triggers} @property - def schema(self): + def schema(self) -> str: + "SQL schema for this database" sqls = [] for row in self.execute( "select sql from sqlite_master where sql is not null" @@ -329,14 +482,17 @@ def schema(self): return "\n".join(sqls) @property - def journal_mode(self): + def journal_mode(self) -> str: + "Current ``journal_mode`` of this database." return self.execute("PRAGMA journal_mode;").fetchone()[0] def enable_wal(self): + "Set ``journal_mode`` to ``'wal'`` to enable Write-Ahead Log mode." if self.journal_mode != "wal": self.execute("PRAGMA journal_mode=wal;") def disable_wal(self): + "Set ``journal_mode`` back to ``'delete'`` to disable Write-Ahead Log mode." if self.journal_mode != "delete": self.execute("PRAGMA journal_mode=delete;") @@ -345,6 +501,10 @@ def _ensure_counts_table(self): self.execute(_COUNTS_TABLE_CREATE_SQL.format(self._counts_table_name)) def enable_counts(self): + """ + Enable trigger-based count caching for every table in the database, see + :ref:`python_api_cached_table_counts`. + """ self._ensure_counts_table() for table in self.tables: if ( @@ -354,7 +514,11 @@ def enable_counts(self): table.enable_counts() self.use_counts_table = True - def cached_counts(self, tables=None): + def cached_counts(self, tables: Optional[Iterable[str]] = None) -> Dict[str, int]: + """ + Return ``{table_name: count}`` dictionary of cached counts for specified tables, or + all tables if ``tables`` not provided. + """ sql = "select [table], count from {}".format(self._counts_table_name) if tables: sql += " where [table] in ({})".format(", ".join("?" for table in tables)) @@ -364,6 +528,7 @@ def cached_counts(self, tables=None): return {} def reset_counts(self): + "Re-calculate cached counts for tables." tables = [table for table in self.tables if table.has_counts_triggers] with self.conn: self._ensure_counts_table() @@ -374,14 +539,6 @@ def reset_counts(self): for table in tables ) - def query( - self, sql: str, params: Optional[Union[Iterable, dict]] = None - ) -> Generator[dict, None, None]: - cursor = self.execute(sql, params or tuple()) - keys = [d[0] for d in cursor.description] - for row in cursor: - yield dict(zip(keys, row)) - def execute_returning_dicts( self, sql: str, params: Optional[Union[Iterable, dict]] = None ) -> List[dict]: @@ -430,16 +587,17 @@ def resolve_foreign_keys(self, name, foreign_keys): def create_table_sql( self, - name, - columns, - pk=None, + name: str, + columns: Dict[str, Any], + pk: Optional[Any] = None, foreign_keys=None, column_order=None, not_null=None, defaults=None, hash_id=None, extracts=None, - ): + ) -> str: + "Returns the SQL ``CREATE TABLE`` statement for creating the specified table." foreign_keys = self.resolve_foreign_keys(name, foreign_keys or []) foreign_keys_by_column = {fk.column: fk for fk in foreign_keys} # any extracts will be treated as integer columns with a foreign key @@ -536,16 +694,21 @@ def create_table_sql( def create_table( self, - name, - columns, - pk=None, + name: str, + columns: Dict[str, Any], + pk: Optional[Any] = None, foreign_keys=None, column_order=None, not_null=None, defaults=None, hash_id=None, extracts=None, - ): + ) -> "Table": + """ + Create a table with the specified name and the specified ``{column_name: type}`` columns. + + See :ref:`python_api_explicit_create`. + """ sql = self.create_table_sql( name=name, columns=columns, @@ -558,7 +721,7 @@ def create_table( extracts=extracts, ) self.execute(sql) - return self.table( + table = self.table( name, pk=pk, foreign_keys=foreign_keys, @@ -567,8 +730,17 @@ def create_table( defaults=defaults, hash_id=hash_id, ) + return cast(Table, table) + + def create_view( + self, name: str, sql: str, ignore: bool = False, replace: bool = False + ): + """ + Create a new SQL view with the specified name - ``sql`` should start with ``SELECT ...``. - def create_view(self, name, sql, ignore=False, replace=False): + - ``ignore`` - set to ``True`` to do nothing if a view with this name already exists + - ``replace`` - set to ``True`` to do replace the view if one with this name already exists + """ assert not ( ignore and replace ), "Use one or the other of ignore/replace, not both" @@ -586,18 +758,28 @@ def create_view(self, name, sql, ignore=False, replace=False): self.execute(create_sql) return self - def m2m_table_candidates(self, table, other_table): - "Returns potential m2m tables for arguments, based on FKs" + def m2m_table_candidates(self, table: str, other_table: str) -> List[str]: + """ + Given two table names returns the name of tables that could define a + many-to-many relationship between those two tables, based on having + foreign keys to both of the provided tables. + """ candidates = [] tables = {table, other_table} - for table in self.tables: + for table_obj in self.tables: # Does it have foreign keys to both table and other_table? - has_fks_to = {fk.other_table for fk in table.foreign_keys} + has_fks_to = {fk.other_table for fk in table_obj.foreign_keys} if has_fks_to.issuperset(tables): - candidates.append(table.name) + candidates.append(table_obj.name) return candidates - def add_foreign_keys(self, foreign_keys): + def add_foreign_keys(self, foreign_keys: Iterable[Tuple[str, str, str, str]]): + """ + See :ref:`python_api_add_foreign_keys`. + + ``foreign_keys`` should be a list of ``(table, column, other_table, other_column)`` + tuples, see :ref:`python_api_add_foreign_keys`. + """ # foreign_keys is a list of explicit 4-tuples assert all( len(fk) == 4 and isinstance(fk, (list, tuple)) for fk in foreign_keys @@ -609,7 +791,11 @@ def add_foreign_keys(self, foreign_keys): for table, column, other_table, other_column in foreign_keys: if not self[table].exists(): raise AlterError("No such table: {}".format(table)) - if column not in self[table].columns_dict: + table_obj = self[table] + if not isinstance(table_obj, Table): + raise AlterError("Must be a table, not a view: {}".format(table)) + table_obj = cast(Table, table_obj) + if column not in table_obj.columns_dict: raise AlterError("No such column: {} in {}".format(column, table)) if not self[other_table].exists(): raise AlterError("No such other_table: {}".format(other_table)) @@ -623,7 +809,7 @@ def add_foreign_keys(self, foreign_keys): # We will silently skip foreign keys that exist already if not any( fk - for fk in self[table].foreign_keys + for fk in table_obj.foreign_keys if fk.column == column and fk.other_table == other_table and fk.other_column == other_column @@ -633,7 +819,7 @@ def add_foreign_keys(self, foreign_keys): ) # Construct SQL for use with "UPDATE sqlite_master SET sql = ? WHERE name = ?" - table_sql = {} + table_sql: Dict[str, str] = {} for table, column, other_table, other_column in foreign_keys_to_create: old_sql = table_sql.get(table, self[table].schema) extra_sql = ",\n FOREIGN KEY([{column}]) REFERENCES [{other_table}]([{other_column}])\n".format( @@ -661,6 +847,7 @@ def add_foreign_keys(self, foreign_keys): self.vacuum() def index_foreign_keys(self): + "Create indexes for every foreign key column on every table in the database." for table_name in self.table_names(): table = self[table_name] existing_indexes = { @@ -671,11 +858,13 @@ def index_foreign_keys(self): table.create_index([fk.column]) def vacuum(self): + "Run a SQLite ``VACUUM`` against the database." self.execute("VACUUM;") class Queryable: - def exists(self): + def exists(self) -> bool: + "Does this table or view exist yet?" return False def __init__(self, db, name): @@ -684,9 +873,10 @@ def __init__(self, db, name): def count_where( self, - where=None, - where_args=None, - ): + where: str = None, + where_args: Optional[Union[Iterable, dict]] = None, + ) -> int: + "Executes ``SELECT count(*) FROM table WHERE ...`` and returns a count." sql = "select count(*) from [{}]".format(self.name) if where is not None: sql += " where " + where @@ -697,24 +887,38 @@ def execute_count(self): return self.count_where() @property - def count(self): + def count(self) -> int: + "A count of the rows in this table or view." return self.count_where() @property - def rows(self): + def rows(self) -> Generator[dict, None, None]: + "Iterate over every dictionaries for each row in this table or view." return self.rows_where() def rows_where( self, - where=None, - where_args=None, - order_by=None, - select="*", - limit=None, - offset=None, - ): + where: str = None, + where_args: Optional[Union[Iterable, dict]] = None, + order_by: str = None, + select: str = "*", + limit: int = None, + offset: int = None, + ) -> Generator[dict, None, None]: + """ + Iterate over every row in this table or view that matches the specified where clause. + + - ``where`` - a SQL fragment to use as a ``WHERE`` clause, for example ``age > ?`` or ``age > :age``. + - ``where_args`` - a list of arguments (if using ``?``) or a dictionary (if using ``:age``). + - ``order_by`` - optional column or fragment of SQL to order by. + - ``select`` - optional comma-separated list of columns to select. + - ``limit`` - optional integer number of rows to limit to. + - ``offset`` - optional integer for SQL offset. + + Returns each row as a dictionary. See :ref:`python_api_rows` for more details. + """ if not self.exists(): - return [] + return sql = "select {} from [{}]".format(select, self.name) if where is not None: sql += " where " + where @@ -731,13 +935,13 @@ def rows_where( def pks_and_rows_where( self, - where=None, - where_args=None, - order_by=None, - limit=None, - offset=None, - ): - "Like .rows_where() but returns (pk, row) pairs - pk can be a single value or tuple" + where: str = None, + where_args: Optional[Union[Iterable, dict]] = None, + order_by: str = None, + limit: int = None, + offset: int = None, + ) -> Generator[Tuple[Any, Dict], None, None]: + "Like ``.rows_where()`` but returns ``(pk, row)`` pairs - ``pk`` can be a single value or tuple." column_names = [column.name for column in self.columns] pks = [column.name for column in self.columns if column.is_pk] if not pks: @@ -758,32 +962,37 @@ def pks_and_rows_where( yield row_pk, row @property - def columns(self): + def columns(self) -> List["Column"]: + "List of :ref:`Columns ` representing the columns in this table or view." if not self.exists(): return [] rows = self.db.execute("PRAGMA table_info([{}])".format(self.name)).fetchall() return [Column(*row) for row in rows] @property - def columns_dict(self): - "Returns {column: python-type} dictionary" + def columns_dict(self) -> Dict[str, Any]: + "``{column_name: python-type}`` dictionary representing columns in this table or view." return {column.name: column_affinity(column.type) for column in self.columns} @property - def schema(self): + def schema(self) -> str: + "SQL schema for this table or view." return self.db.execute( "select sql from sqlite_master where name = ?", (self.name,) ).fetchone()[0] class Table(Queryable): - last_rowid = None - last_pk = None + "Tables should usually be initialized using the ``db.table(table_name)`` or ``db[table_name]`` methods." + #: The ``rowid`` of the last inserted, updated or selected row.` + last_rowid: Optional[int] = None + #: The primary key of the last inserted, updated or selected row.` + last_pk: Optional[Any] = None def __init__( self, - db, - name, + db: Database, + name: str, pk=None, foreign_keys=None, column_order=None, @@ -815,7 +1024,7 @@ def __init__( columns=columns, ) - def __repr__(self): + def __repr__(self) -> str: return "".format( self.name, " (does not exist yet)" @@ -824,7 +1033,8 @@ def __repr__(self): ) @property - def count(self): + def count(self) -> int: + "Count of the rows in this table - optionally from the table count cache, if configured." if self.db.use_counts_table: counts = self.db.cached_counts([self.name]) if counts: @@ -835,17 +1045,26 @@ def exists(self): return self.name in self.db.table_names() @property - def pks(self): + def pks(self) -> List[str]: + "Primary key columns for this table." names = [column.name for column in self.columns if column.is_pk] if not names: names = ["rowid"] return names @property - def use_rowid(self): + def use_rowid(self) -> bool: + "Does this table use ``rowid`` for its primary key (no other primary keys are specified)?" return not any(column for column in self.columns if column.is_pk) - def get(self, pk_values): + def get(self, pk_values: Union[list, tuple, str, int]) -> dict: + """ + Return row (as dictionary) for the specified primary key. + + Primary key can be a single value, or a tuple for tables with a compound primary key. + + Raises ``NotFoundError`` if a matching row cannot be found. + """ if not isinstance(pk_values, (list, tuple)): pk_values = [pk_values] pks = self.pks @@ -867,7 +1086,8 @@ def get(self, pk_values): raise NotFoundError @property - def foreign_keys(self): + def foreign_keys(self) -> List["ForeignKey"]: + "List of foreign keys defined on this table." fks = [] for row in self.db.execute( "PRAGMA foreign_key_list([{}])".format(self.name) @@ -885,15 +1105,16 @@ def foreign_keys(self): return fks @property - def virtual_table_using(self): - "Returns type of virtual table or None if this is not a virtual table" + def virtual_table_using(self) -> Optional[str]: + "Type of virtual table, or ``None`` if this is not a virtual table." match = _virtual_table_using_re.match(self.schema) if match is None: return None return match.groupdict()["using"].upper() @property - def indexes(self): + def indexes(self) -> List[Index]: + "List of indexes defined on this table." sql = 'PRAGMA index_list("{}")'.format(self.name) indexes = [] for row in self.db.execute_returning_dicts(sql): @@ -916,7 +1137,8 @@ def indexes(self): return indexes @property - def xindexes(self): + def xindexes(self) -> List[XIndex]: + "List of indexes defined on this table using the more detailed ``XIndex`` format." sql = 'PRAGMA index_list("{}")'.format(self.name) indexes = [] for row in self.db.execute_returning_dicts(sql): @@ -934,7 +1156,8 @@ def xindexes(self): return indexes @property - def triggers(self): + def triggers(self) -> List[Trigger]: + "List of triggers defined on this table." return [ Trigger(*r) for r in self.db.execute( @@ -945,8 +1168,8 @@ def triggers(self): ] @property - def triggers_dict(self): - "Returns {trigger_name: sql} dictionary" + def triggers_dict(self) -> Dict[str, str]: + "``{trigger_name: sql}`` dictionary of triggers defined on this table." return {trigger.name: trigger.sql for trigger in self.triggers} def create( @@ -959,7 +1182,12 @@ def create( defaults=None, hash_id=None, extracts=None, - ): + ) -> "Table": + """ + Create a table with the specified columns. + + See :ref:`python_api_explicit_create` for full details. + """ columns = {name: value for (name, value) in columns.items()} with self.db.conn: self.db.create_table( @@ -986,7 +1214,13 @@ def transform( defaults=None, drop_foreign_keys=None, column_order=None, - ): + ) -> "Table": + """ + Apply an advanced alter table, including operations that are not supported by + ``ALTER TABLE`` in SQLite itself. + + See :ref:`python_api_transform` for full details. + """ assert self.exists(), "Cannot transform a table that doesn't exist yet" sqls = self.transform_sql( types=types, @@ -1027,7 +1261,8 @@ def transform_sql( drop_foreign_keys=None, column_order=None, tmp_suffix=None, - ): + ) -> List[str]: + "Returns a list of SQL statements that would be executed in order to apply this transformation." types = types or {} rename = rename or {} drop = drop or set() @@ -1133,7 +1368,18 @@ def transform_sql( ) return sqls - def extract(self, columns, table=None, fk_column=None, rename=None): + def extract( + self, + columns: Union[str, Iterable[str]], + table: Optional[str] = None, + fk_column: Optional[str] = None, + rename: Optional[Dict[str, str]] = None, + ) -> "Table": + """ + Extract specified columns into a separate table. + + See :ref:`python_api_extract` for details. + """ rename = rename or {} if isinstance(columns, str): columns = [columns] @@ -1225,7 +1471,24 @@ def extract(self, columns, table=None, fk_column=None, rename=None): self.add_foreign_key(fk_column, table, "id") return self - def create_index(self, columns, index_name=None, unique=False, if_not_exists=False): + def create_index( + self, + columns: Iterable[Union[str, DescIndex]], + index_name: Optional[str] = None, + unique: bool = False, + if_not_exists: bool = False, + ): + """ + Create an index on this table. + + - ``columns`` - a single columns or list of columns to index. These can be strings or, + to create an index using the column in descending order, ``db.DescIndex(column_name)`` objects. + - ``index_name`` - the name to use for the new index. Defaults to the column names joined on ``_``. + - ``unique`` - should the index be marked as unique, forcing unique values? + - ``if_not_exists`` - only create the index if one with that name does not already exist. + + See :ref:`python_api_create_index`. + """ if index_name is None: index_name = "idx_{}_{}".format( self.name.replace(" ", "_"), "_".join(columns) @@ -1257,8 +1520,9 @@ def create_index(self, columns, index_name=None, unique=False, if_not_exists=Fal return self def add_column( - self, col_name, col_type=None, fk=None, fk_col=None, not_null_default=None + self, col_name: str, col_type=None, fk=None, fk_col=None, not_null_default=None ): + "Add a column to this table. See :ref:`python_api_add_column`." fk_col_type = None if fk is not None: # fk must be a valid table @@ -1293,14 +1557,24 @@ def add_column( self.add_foreign_key(col_name, fk, fk_col) return self - def drop(self, ignore=False): + def drop(self, ignore: bool = False): + "Drop this table. ``ignore=True`` means errors will be ignored." try: self.db.execute("DROP TABLE [{}]".format(self.name)) except sqlite3.OperationalError: if not ignore: raise - def guess_foreign_table(self, column): + def guess_foreign_table(self, column: str) -> str: + """ + For a given column, suggest another table that might be referenced by this + column should it be used as a foreign key. + + For example, a column called ``tag_id`` or ``tag`` or ``tags`` might suggest + a ``tag`` table, if one exists. + + If no candidates can be found, raises a ``NoObviousTable`` exception. + """ column = column.lower() possibilities = [column] if column.endswith("_id"): @@ -1321,7 +1595,7 @@ def guess_foreign_table(self, column): ) ) - def guess_foreign_column(self, other_table): + def guess_foreign_column(self, other_table: str): pks = [c for c in self.db[other_table].columns if c.is_pk] if len(pks) != 1: raise BadPrimaryKey( @@ -1331,8 +1605,20 @@ def guess_foreign_column(self, other_table): return pks[0].name def add_foreign_key( - self, column, other_table=None, other_column=None, ignore=False + self, + column: str, + other_table: Optional[str] = None, + other_column: Optional[str] = None, + ignore: bool = False, ): + """ + Alter the schema to mark the specified column as a foreign key to another table. + + - ``column`` - the column to mark as a foreign key. + - ``other_table`` - the table it refers to - if omitted, will be guessed based on the column name. + - ``other_column`` - the column on the other table it - if omitted, will be guessed. + - ``ignore`` - set this to ``True`` to ignore an existing foreign key - otherwise a ``AlterError` will be raised. + """ # Ensure column exists if column not in self.columns_dict: raise AlterError("No such column: {}".format(column)) @@ -1369,6 +1655,11 @@ def add_foreign_key( return self def enable_counts(self): + """ + Set up triggers to update a cache of the count of rows in this table. + + See :ref:`python_api_cached_table_counts` for details. + """ sql = ( textwrap.dedent( """ @@ -1413,7 +1704,8 @@ def enable_counts(self): self.db.use_counts_table = True @property - def has_counts_triggers(self): + def has_counts_triggers(self) -> bool: + "Does this table have triggers setup to update cached counts?" trigger_names = { "{table}{counts_table}_{suffix}".format( counts_table=self.db._counts_table_name, table=self.name, suffix=suffix @@ -1424,13 +1716,23 @@ def has_counts_triggers(self): def enable_fts( self, - columns, - fts_version="FTS5", - create_triggers=False, - tokenize=None, - replace=False, + columns: Iterable[str], + fts_version: str = "FTS5", + create_triggers: bool = False, + tokenize: Optional[str] = None, + replace: bool = False, ): - "Enables FTS on the specified columns." + """ + Enable SQLite full-text search against the specified columns. + + - ``columns`` - list of column names to include in the search index. + - ``fts_version`` - FTS version to use - defaults to ``FTS5`` but you may want ``FTS4`` for older SQLite versions. + - ``create_triggers`` - should triggers be created to keep the search index up-to-date? Defaults to ``False``. + - ``tokenize`` - custom SQLite tokenizer to use, for example ``"porter"`` to enable Porter stemming. + - ``replace`` - should any existing FTS index for this table be replaced by the new one? + + See :ref:`python_api_fts` for more details. + """ create_fts_sql = ( textwrap.dedent( """ @@ -1498,7 +1800,11 @@ def enable_fts( self.db.executescript(triggers) return self - def populate_fts(self, columns): + def populate_fts(self, columns: Iterable[str]) -> "Table": + """ + Update the associated SQLite full-text search index with the latest data from the + table for the specified columns. + """ sql = ( textwrap.dedent( """ @@ -1514,7 +1820,8 @@ def populate_fts(self, columns): self.db.executescript(sql) return self - def disable_fts(self): + def disable_fts(self) -> "Table": + "Remove any full-text search index and related triggers configured for this table." fts_table = self.detect_fts() if fts_table: self.db[fts_table].drop() @@ -1539,6 +1846,7 @@ def disable_fts(self): return self def rebuild_fts(self): + "Run the ``rebuild`` operation against the associated full-text search index table." fts_table = self.detect_fts() if fts_table is None: # Assume this is itself an FTS table @@ -1550,7 +1858,7 @@ def rebuild_fts(self): ) return self - def detect_fts(self): + def detect_fts(self) -> Optional[str]: "Detect if table has a corresponding FTS virtual table and return it" sql = ( textwrap.dedent( @@ -1575,7 +1883,8 @@ def detect_fts(self): else: return rows[0][0] - def optimize(self): + def optimize(self) -> "Table": + "Run the ``optimize`` operation against the associated full-text search index table." fts_table = self.detect_fts() if fts_table is not None: self.db.execute( @@ -1587,7 +1896,8 @@ def optimize(self): ) return self - def search_sql(self, columns=None, order_by=None, limit=None, offset=None): + def search_sql(self, columns=None, order_by=None, limit=None, offset=None) -> str: + "Return SQL string that can be used to execute searches against this table." # Pick names for table and rank column that don't clash original = "original_" if self.name == "original" else "original" columns_sql = "*" @@ -1644,7 +1954,26 @@ def search_sql(self, columns=None, order_by=None, limit=None, offset=None): limit_offset=limit_offset.strip(), ).strip() - def search(self, q, order_by=None, columns=None, limit=None, offset=None): + def search( + self, + q: str, + order_by: Optional[str] = None, + columns: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + ) -> Generator[dict, None, None]: + """ + Execute a search against this table using SQLite full-text search, returning a sequence of + dictionaries for each row. + + - ``q`` - words to search for + - ``order_by`` - defaults to order by rank, or specify a column here. + - ``columns`` - list of columns to return, defaults to all columns. + - ``limit`` - optional integer limit for returned rows. + - ``offset`` - optional integer SQL offset. + + See :ref:`python_api_fts_search`. + """ cursor = self.db.execute( self.search_sql( order_by=order_by, @@ -1661,7 +1990,8 @@ def search(self, q, order_by=None, columns=None, limit=None, offset=None): def value_or_default(self, key, value): return self._defaults[key] if value is DEFAULT else value - def delete(self, pk_values): + def delete(self, pk_values: Union[list, tuple, str, int, float]) -> "Table": + "Delete row matching the specified primary key." if not isinstance(pk_values, (list, tuple)): pk_values = [pk_values] self.get(pk_values) @@ -1673,16 +2003,37 @@ def delete(self, pk_values): self.db.execute(sql, pk_values) return self - def delete_where(self, where=None, where_args=None): + def delete_where( + self, where: str = None, where_args: Optional[Union[Iterable, dict]] = None + ) -> "Table": + "Delete rows matching specified where clause, or delete all rows in the table." if not self.exists(): - return [] + return self sql = "delete from [{}]".format(self.name) if where is not None: sql += " where " + where self.db.execute(sql, where_args or []) return self - def update(self, pk_values, updates=None, alter=False, conversions=None): + def update( + self, + pk_values: Union[list, tuple, str, int, float], + updates: Optional[dict] = None, + alter: bool = False, + conversions: Optional[dict] = None, + ) -> "Table": + """ + Execute a SQL ``UPDATE`` against the specified row. + + - ``pk_values`` - the primary key of an individual record - can be a tuple if the + table has a compound primary key. + - ``updates`` - a dictionary mapping columns to their updated values. + - ``alter``` - set to ``True`` to add any missing columns. + - ``conversions`` - optional dictionary of SQL functions to apply during the update, for example + ``{"mycolumn": "upper(?)"}``. + + See :ref:`python_api_update`. + """ updates = updates or {} conversions = conversions or {} if not isinstance(pk_values, (list, tuple)): @@ -1722,16 +2073,34 @@ def update(self, pk_values, updates=None, alter=False, conversions=None): def convert( self, - columns, - fn, - output=None, - output_type=None, - drop=False, - multi=False, - where=None, - where_args=None, - show_progress=False, + columns: Union[str, List[str]], + fn: Callable, + output: Optional[str] = None, + output_type: Optional[Any] = None, + drop: bool = False, + multi: bool = False, + where: Optional[str] = None, + where_args: Optional[Union[Iterable, dict]] = None, + show_progress: bool = False, ): + """ + Apply conversion function ``fn`` to every value in the specified columns. + + - ``columns`` - a single column or list of string column names to convert. + - ``fn`` - a callable that takes a single argument, ``value``, and returns it converted. + - ``output`` - optional string column name to write the results to (defaults to the input column). + - ``output_type`` - if the output column needs to be created, this is the type that will be used + for the new column. + - ``drop`` - boolean, should the original column be dropped once the conversion is complete? + - ``multi`` - boolean, if ``True`` the return value of ``fn(value)`` will be expected to be a + dictionary, and new columns will be created for each key of that dictionary. + - ``where`` - a SQL fragment to use as a ``WHERE`` clause to limit the rows to which the conversion + is applied, for example ``age > ?`` or ``age > :age``. + - ``where_args`` - a list of arguments (if using ``?``) or a dictionary (if using ``:age``). + - ``show_progress`` - boolean, should a progress bar be displayed? + + See :ref:`python_api_convert`. + """ if isinstance(columns, str): columns = [columns] @@ -2012,20 +2381,51 @@ def insert_chunk( def insert( self, - record, + record: Dict[str, Any], pk=DEFAULT, foreign_keys=DEFAULT, - column_order=DEFAULT, - not_null=DEFAULT, - defaults=DEFAULT, - hash_id=DEFAULT, - alter=DEFAULT, - ignore=DEFAULT, - replace=DEFAULT, - extracts=DEFAULT, - conversions=DEFAULT, - columns=DEFAULT, - ): + column_order: Optional[Union[List[str], Default]] = DEFAULT, + not_null: Optional[Union[Set[str], Default]] = DEFAULT, + defaults: Optional[Union[Dict[str, Any], Default]] = DEFAULT, + hash_id: Optional[Union[str, Default]] = DEFAULT, + alter: Optional[Union[bool, Default]] = DEFAULT, + ignore: Optional[Union[bool, Default]] = DEFAULT, + replace: Optional[Union[bool, Default]] = DEFAULT, + extracts: Optional[Union[Dict[str, str], List[str], Default]] = DEFAULT, + conversions: Optional[Union[Dict[str, str], Default]] = DEFAULT, + columns: Optional[Union[Dict[str, Any], Default]] = DEFAULT, + ) -> "Table": + """ + Insert a single record into the table. The table will be created with a schema that matches + the inserted record if it does not already exist, see :ref:`python_api_creating_tables`. + + - ``record`` - required: a dictionary representing the record to be inserted. + + The other parameters are optional, and mostly influence how the new table will be created if + that table does not exist yet. + + Each of them defaults to ``DEFAULT``, which indicates that the default setting for the current + ``Table`` object (specified in the table constructor) should be used. + + - ``pk`` - if creating the table, which column should be the primary key. + - ``foreign_keys`` - see :ref:`python_api_foreign_keys`. + - ``column_order`` - optional list of strings specifying a full or partial column order + to use when creating the table. + - ``not_null`` - optional set of strings specifying columns that should be ``NOT NULL``. + - ``defaults`` - optional dictionary specifying default values for specific columns. + - ``hash_id`` - optional name of a column to create and use as a primary key, where the + value of thet primary key will be derived as a SHA1 hash of the other column values + in the record. ``hash_id="id"`` is a common column name used for this. + - ``alter`` - boolean, should any missing columns be added automatically? + - ``ignore`` - boolean, if a record already exists with this primary key, ignore this insert. + - ``replace`` - boolean, if a record already exists with this primary key, replace it with this new record. + - ``extracts`` - a list of columns to extract to other tables, or a dictionary that maps + ``{column_name: other_table_name}``. See :ref:`python_api_extracts`. + - ``conversions`` - dictionary specifying SQL conversion functions to be applied to the data while it + is being inserted, for example ``{"name": "upper(?)"}``. See :ref:`python_api_conversions`. + - ``columns`` - dictionary over-riding the detected types used for the columns, for example + ``{"age": int, "weight": float}``. + """ return self.insert_all( [record], pk=pk, @@ -2060,11 +2460,10 @@ def insert_all( conversions=DEFAULT, columns=DEFAULT, upsert=False, - ): + ) -> "Table": """ - Like .insert() but takes a list of records and ensures that the table - that it creates (if table does not exist) has columns for ALL of that - data + Like ``.insert()`` but takes a list of records and ensures that the table + that it creates (if table does not exist) has columns for ALL of that data. """ pk = self.value_or_default("pk", pk) foreign_keys = self.value_or_default("foreign_keys", foreign_keys) @@ -2089,7 +2488,7 @@ def insert_all( assert not ( ignore and replace ), "Use either ignore=True or replace=True, not both" - all_columns = None + all_columns = [] first = True num_records_processed = 0 # We can only handle a max of 999 variables in a SQL insert, so @@ -2127,10 +2526,10 @@ def insert_all( hash_id=hash_id, extracts=extracts, ) - all_columns = set() + all_columns_set = set() for record in chunk: - all_columns.update(record.keys()) - all_columns = list(sorted(all_columns)) + all_columns_set.update(record.keys()) + all_columns = list(sorted(all_columns_set)) if hash_id: all_columns.insert(0, hash_id) else: @@ -2171,7 +2570,13 @@ def upsert( extracts=DEFAULT, conversions=DEFAULT, columns=DEFAULT, - ): + ) -> "Table": + """ + Like ``.insert()`` but performs an ``UPSERT``, where records are inserted if they do + not exist and updated if they DO exist, based on matching against their primary key. + + See :ref:`python_api_upsert`. + """ return self.upsert_all( [record], pk=pk, @@ -2200,7 +2605,10 @@ def upsert_all( extracts=DEFAULT, conversions=DEFAULT, columns=DEFAULT, - ): + ) -> "Table": + """ + Like ``.upsert()`` but can be applied to a list of records. + """ return self.insert_all( records, pk=pk, @@ -2217,7 +2625,7 @@ def upsert_all( upsert=True, ) - def add_missing_columns(self, records): + def add_missing_columns(self, records: Iterable[Dict[str, Any]]) -> "Table": needed_columns = suggest_column_types(records) current_columns = {c.lower() for c in self.columns_dict} for col_name, col_type in needed_columns.items(): @@ -2225,7 +2633,20 @@ def add_missing_columns(self, records): self.add_column(col_name, col_type) return self - def lookup(self, column_values): + def lookup(self, column_values: Dict[str, Any]): + """ + Create or populate a lookup table with the specified values. + + ``db["Species"].lookup({"name": "Palm"})`` will create a table called ``Species`` + (if one does not already exist) with two columns: ``id`` and ``name``. It will + set up a unique constraint on the ``name`` column to guarantee it will not + contain duplicate rows. + + It well then inserts a new row with the ``name`` set to ``Palm`` and return the + new integer primary key value. + + See :ref:`python_api_lookup_tables` for more details. + """ # lookups is a dictionary - all columns will be used for a unique index assert isinstance(column_values, dict) if self.exists(): @@ -2250,15 +2671,38 @@ def lookup(self, column_values): def m2m( self, - other_table, - record_or_iterable=None, - pk=DEFAULT, - lookup=None, - m2m_table=None, - alter=False, + other_table: Union[str, "Table"], + record_or_iterable: Optional[ + Union[Iterable[Dict[str, Any]], Dict[str, Any]] + ] = None, + pk: Optional[Union[Any, Default]] = DEFAULT, + lookup: Optional[Dict[str, Any]] = None, + m2m_table: Optional[str] = None, + alter: bool = False, ): + """ + After inserting a record in a table, create one or more records in some other + table and then create many-to-many records linking the original record and the + newly created records together. + + For example:: + + db["dogs"].insert({"id": 1, "name": "Cleo"}, pk="id").m2m( + "humans", {"id": 1, "name": "Natalie"}, pk="id" + ) + See :ref:`python_api_m2m` for details. + + - ``other_table`` - the name of the table to insert the new records into. + - ``record_or_iterable`` - a single dictionary record to insert, or a list of records. + - ``pk`` - the primary key to use if creating ``other_table``. + - ``lookup`` - same dictionary as for ``.lookup()``, to create a many-to-many lookup table. + - ``m2m_table`` - the string name to use for the many-to-many table, defaults to creating + this automatically based on the names of the two tables. + - ``alter``` - set to ``True`` to add any missing columns on ``other_table`` if that table + already exists. + """ if isinstance(other_table, str): - other_table = self.db.table(other_table, pk=pk) + other_table = cast(Table, self.db.table(other_table, pk=pk)) our_id = self.last_pk if lookup is not None: assert record_or_iterable is None, "Provide lookup= or record, not both" @@ -2282,20 +2726,19 @@ def m2m( else: # If not, create a new table m2m_table_name = m2m_table or "{}_{}".format(*tables) - m2m_table = self.db.table(m2m_table_name, pk=columns, foreign_keys=columns) + m2m_table_obj = self.db.table(m2m_table_name, pk=columns, foreign_keys=columns) if lookup is None: # if records is only one record, put the record in a list - records = ( - [record_or_iterable] - if isinstance(record_or_iterable, Mapping) - else record_or_iterable - ) + if isinstance(record_or_iterable, Mapping): + records = [record_or_iterable] + else: + records = cast(List, record_or_iterable) # Ensure each record exists in other table for record in records: id = other_table.insert( - record, pk=pk, replace=True, alter=alter + cast(dict, record), pk=pk, replace=True, alter=alter ).last_pk - m2m_table.insert( + m2m_table_obj.insert( { "{}_id".format(other_table.name): id, "{}_id".format(self.name): our_id, @@ -2304,7 +2747,7 @@ def m2m( ) else: id = other_table.lookup(lookup) - m2m_table.insert( + m2m_table_obj.insert( { "{}_id".format(other_table.name): id, "{}_id".format(self.name): our_id, @@ -2314,8 +2757,13 @@ def m2m( return self def analyze_column( - self, column, common_limit=10, value_truncate=None, total_rows=None - ): + self, column: str, common_limit: int = 10, value_truncate=None, total_rows=None + ) -> "ColumnDetails": + """ + Return statistics about the specified column. + + See :ref:`python_api_analyze_column`. + """ db = self.db table = self.name if total_rows is None: