Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# v0.4.5

- Change `infer_foreign_keys` option to `infer_fks` to match other option names (old name still works)
- Added `infer_fks_ignore_tables` to allow skipping inferring fks to some tables
- Fix error when setting `include_dependencies` to false
- Improved support for sampling rows when they have null fks
- Added support for polymorphic foreign keys
- Removed Python 3.8, 3.9 support and added 3.13, 3.14 support
Expand Down
10 changes: 7 additions & 3 deletions subsetter.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,16 @@ planner:
# keys. It does this by inferring that a column name that matches the name
# of a primary key column should function as a foreign key to that table.
# If set to 'schema' will limit matches to tables within the same schema.
infer_foreign_keys: none # can be 'none', 'schema', or 'all'
infer_fks: none # can be 'none', 'schema', or 'all'

# Do not attempt to infer foreign keys to these tables.
infer_fks_ignore_tables:
- db2.gizmos-foo

# By default the subsetter will automatically pull in tables referenced by
# tables already being selected to ensure their dependent rows can be pulled
# in with sampling. If this behavior is not desired you can set this value to
# false.
# in with sampling. Generally this behavior is desired otherwise foreign key
# constraints are likely to be violated at sample time.
include_dependencies: true

# Optional sampler config. Will write sample output to a directory named
Expand Down
7 changes: 5 additions & 2 deletions subsetter/config_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, List, Literal, Optional, Union

from pydantic import BaseModel, ConfigDict, Field, model_validator
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Annotated

from subsetter.common import DatabaseConfig, SQLKnownOperator, SQLLiteralType
Expand Down Expand Up @@ -82,7 +82,10 @@ class ColumnConstraint(ForbidBaseModel):
ignore_fks: List[IgnoreFKConfig] = []
extra_fks: List[ExtraFKConfig] = []
polymorphic_fks: List[PolymorphicFKConfig] = []
infer_foreign_keys: Literal["none", "schema", "all"] = "none"
infer_fks: Literal["none", "shema", "all"] = Field(
"none", validation_alias=AliasChoices("infer_fks", "infer_foreign_keys")
)
infer_fks_ignore_tables: List[str] = []
include_dependencies: bool = True


Expand Down
23 changes: 19 additions & 4 deletions subsetter/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import dataclasses
import logging
from fnmatch import fnmatch
from typing import Dict, List, Optional, Set, Tuple
from typing import Dict, Iterable, List, Optional, Set, Tuple

import sqlalchemy as sa

Expand Down Expand Up @@ -40,6 +40,8 @@ class TableMetadata:
def __init__(
self,
table_obj: sa.Table,
*,
table_set: Optional[Set[Tuple[str, str]]] = None,
) -> None:
assert table_obj.schema is not None
self.table_obj = table_obj
Expand All @@ -49,7 +51,10 @@ def __init__(
column.name for column in table_obj.primary_key.columns
)
self.foreign_keys = [
ForeignKey.from_schema(fk) for fk in table_obj.foreign_key_constraints
ForeignKey.from_schema(fk)
for fk in table_obj.foreign_key_constraints
if table_set is None
or (fk.referred_table.schema, fk.referred_table.name) in table_set
]
self.rev_foreign_keys: List[ForeignKey] = []

Expand Down Expand Up @@ -123,7 +128,8 @@ def from_engine(
metadata_obj,
{
(schema, table): TableMetadata(
metadata_obj.tables[f"{schema}.{table}"]
metadata_obj.tables[f"{schema}.{table}"],
table_set=table_set,
)
for schema, table in table_queue
},
Expand All @@ -137,16 +143,25 @@ def track_new_table(self, table_obj: sa.Table) -> None:
raise ValueError("Table schema must be set")
self.tables[(table_obj.schema, table_obj.name)] = TableMetadata(table_obj)

def infer_missing_foreign_keys(self, *, infer_all: bool = False) -> None:
def infer_missing_foreign_keys(
self,
*,
infer_all: bool = False,
ignore_tables: Iterable[Tuple[str, str]] = (),
) -> None:
def _key_pk(schema: str, pk: Tuple[str, ...]):
if infer_all:
return pk
return (schema, pk)

ignore_tables_st = set(ignore_tables)

pk_map: Dict[Tuple[str, Tuple[str, ...]], Optional[TableMetadata]] = {}
for table in self.tables.values():
if not table.primary_key:
continue
if (table.schema, table.name) in ignore_tables_st:
continue

map_key = _key_pk(table.schema, table.primary_key)
if map_key in pk_map:
Expand Down
8 changes: 6 additions & 2 deletions subsetter/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,13 @@ def plan(self) -> SubsetPlan:
return self._plan_internal()

def _plan_internal(self) -> SubsetPlan:
if self.config.infer_foreign_keys != "none":
if self.config.infer_fks != "none":
self.meta.infer_missing_foreign_keys(
infer_all=self.config.infer_foreign_keys == "all"
infer_all=self.config.infer_fks == "all",
ignore_tables=(
parse_table_name(table)
for table in self.config.infer_fks_ignore_tables
),
)
self._remove_ignore_fks()
self._add_extra_fks()
Expand Down
Loading