Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions docs/guides/performance/mypyc.md
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,35 @@ def process() -> None:

## SQLSpec-Specific Guidelines

### Avoid module-level optional dependency constants

- **Problem**: Using ``MY_FEATURE_INSTALLED = module_available("pkg")`` at import time lets mypyc fold the value into the compiled extension. If the optional package is missing during compilation but added later, the compiled module still sees ``False`` forever and removes the guarded code paths entirely.
- **Solution**: Use the runtime detector in ``sqlspec.utils.dependencies``. Wrap guards with ``dependency_flag("pkg")`` (boolean-like object) or call ``module_available("pkg")`` inside ``ensure_*`` helpers. These helpers evaluate availability at runtime, so compiled modules observe the actual environment when they execute.
- **Example**:

```python
# BAD (constant folded during compilation)
FSSPEC_INSTALLED = module_available("fsspec")
if FSSPEC_INSTALLED:
...

# GOOD
from sqlspec.utils.dependencies import dependency_flag

FSSPEC_INSTALLED = dependency_flag("fsspec")
if FSSPEC_INSTALLED:
... # evaluated when the code runs, not when it is compiled

# GOOD (inside guards)
from sqlspec.utils.dependencies import module_available

def ensure_fsspec() -> None:
if not module_available("fsspec"):
raise MissingDependencyError(package="fsspec", install_package="fsspec")
```

- **Testing tip**: call ``reset_dependency_cache()`` in tests that manipulate ``sys.path`` to force the detector to re-check availability after installing or removing temporary packages.

### File Caching Optimization

```python
Expand Down
55 changes: 24 additions & 31 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,32 +139,34 @@ exclude = ["/.github", "/docs"]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
include = ["NOTICE"]
packages = ["sqlspec"]


[tool.hatch.build.targets.wheel.hooks.mypyc]
dependencies = ["hatch-mypyc", "hatch-cython"]
enable-by-default = false
exclude = [
"tests/**", # Test files
"sqlspec/__main__.py", # Entry point (can't run directly when compiled)
"sqlspec/cli.py", # CLI module (not performance critical)
"sqlspec/typing.py", # Type aliases
"sqlspec/_typing.py", # Type aliases
"sqlspec/config.py", # Main config
"sqlspec/adapters/**/config.py", # Adapter configurations
"sqlspec/adapters/**/_types.py", # Type definitions (mypyc incompatible)
"sqlspec/extensions/**", # All extensions
"sqlspec/**/__init__.py", # Init files (usually just imports)
"sqlspec/protocols.py", # Protocol definitions
"sqlspec/builder/**/*.py", # Builder (not performance critical)
"tests/**", # Test files
"sqlspec/__main__.py", # Entry point (can't run directly when compiled)
"sqlspec/cli.py", # CLI module (not performance critical)
"sqlspec/typing.py", # Type aliases
"sqlspec/_typing.py", # Type aliases
"sqlspec/config.py", # Main config
"sqlspec/adapters/**/config.py", # Adapter configurations
"sqlspec/adapters/**/_types.py", # Type definitions (mypyc incompatible)
"sqlspec/extensions/**", # All extensions
"sqlspec/**/__init__.py", # Init files (usually just imports)
"sqlspec/protocols.py", # Protocol definitions
"sqlspec/builder/**/*.py", # Builder (not performance critical)
"sqlspec/migrations/commands.py", # Migration command CLI (dynamic imports)

]
include = [
"sqlspec/core/**/*.py", # Core module
"sqlspec/loader.py", # Loader module

"sqlspec/core/**/*.py", # Core module
"sqlspec/loader.py", # Loader module
"sqlspec/storage/**/*.py", # Storage layer
"sqlspec/observability/**/*.py", # Observability utilities
"sqlspec/migrations/**/*.py", # Migrations module
# === ADAPTER TYPE CONVERTERS ===
"sqlspec/adapters/adbc/type_converter.py", # ADBC type converter
"sqlspec/adapters/bigquery/type_converter.py", # BigQuery type converter
Expand All @@ -178,22 +180,13 @@ include = [
"sqlspec/utils/type_guards.py", # Type guard utilities
"sqlspec/utils/fixtures.py", # File fixture loading
"sqlspec/utils/data_transformation.py", # Data transformation utilities
"sqlspec/utils/arrow_helpers.py", # Arrow result helpers
"sqlspec/utils/serializers.py", # Serialization helpers
"sqlspec/utils/type_converters.py", # Adapter type converters
"sqlspec/utils/correlation.py", # Correlation context helpers
"sqlspec/utils/portal.py", # Thread portal utilities
"sqlspec/utils/singleton.py", # Lightweight singleton helpers

# === OBSERVABILITY ===
"sqlspec/observability/_config.py",
"sqlspec/observability/_diagnostics.py",
"sqlspec/observability/_dispatcher.py",
"sqlspec/observability/_observer.py",
"sqlspec/observability/_runtime.py",
"sqlspec/observability/_spans.py",

# === STORAGE LAYER ===
# "sqlspec/storage/_utils.py",
# "sqlspec/storage/registry.py",
# "sqlspec/storage/backends/base.py",
# "sqlspec/storage/backends/obstore.py",
# "sqlspec/storage/backends/fsspec.py",
# "sqlspec/storage/backends/local.py",
]
mypy-args = [
"--ignore-missing-imports",
Expand Down
78 changes: 20 additions & 58 deletions sqlspec/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,11 @@
from collections.abc import Iterable, Mapping
from dataclasses import dataclass
from enum import Enum
from importlib.util import find_spec
from typing import Any, ClassVar, Final, Literal, Protocol, cast, runtime_checkable

from typing_extensions import TypeVar, dataclass_transform


def module_available(module_name: str) -> bool:
"""Return True if the given module spec can be resolved.

Args:
module_name: Dotted path for the module to locate.

Returns:
True if the module can be resolved, False otherwise.
"""

try:
return find_spec(module_name) is not None
except ModuleNotFoundError:
return False
from sqlspec.utils.dependencies import dependency_flag, module_available


@runtime_checkable
Expand Down Expand Up @@ -131,12 +116,10 @@ class FailFastStub:
BaseModel = _RealBaseModel
TypeAdapter = _RealTypeAdapter
FailFast = _RealFailFast
PYDANTIC_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
BaseModel = BaseModelStub # type: ignore[assignment,misc]
TypeAdapter = TypeAdapterStub # type: ignore[assignment,misc]
FailFast = FailFastStub # type: ignore[assignment,misc]
PYDANTIC_INSTALLED = False # pyright: ignore[reportConstantRedefinition]

# Always define stub types for msgspec

Expand Down Expand Up @@ -184,21 +167,17 @@ class UnsetTypeStub(enum.Enum):
UnsetType = _RealUnsetType
UNSET = _REAL_UNSET
convert = _real_convert
MSGSPEC_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
Struct = StructStub # type: ignore[assignment,misc]
UnsetType = UnsetTypeStub # type: ignore[assignment,misc]
UNSET = UNSET_STUB # type: ignore[assignment] # pyright: ignore[reportConstantRedefinition]
convert = convert_stub
MSGSPEC_INSTALLED = False # pyright: ignore[reportConstantRedefinition]


try:
import orjson # noqa: F401

ORJSON_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
ORJSON_INSTALLED = False # pyright: ignore[reportConstantRedefinition]
orjson = None # type: ignore[assignment]


# Always define stub type for DTOData
Expand Down Expand Up @@ -228,10 +207,8 @@ def as_builtins(self) -> Any:
from litestar.dto.data_structures import DTOData as _RealDTOData # pyright: ignore[reportUnknownVariableType]

DTOData = _RealDTOData
LITESTAR_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
DTOData = DTODataStub # type: ignore[assignment,misc]
LITESTAR_INSTALLED = False # pyright: ignore[reportConstantRedefinition]


# Always define stub types for attrs
Expand Down Expand Up @@ -290,21 +267,17 @@ def attrs_has_stub(*args: Any, **kwargs: Any) -> bool: # noqa: ARG001
attrs_field = _real_attrs_field
attrs_fields = _real_attrs_fields
attrs_has = _real_attrs_has
ATTRS_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
AttrsInstance = AttrsInstanceStub # type: ignore[misc]
attrs_asdict = attrs_asdict_stub
attrs_define = attrs_define_stub
attrs_field = attrs_field_stub
attrs_fields = attrs_fields_stub
attrs_has = attrs_has_stub # type: ignore[assignment]
ATTRS_INSTALLED = False # pyright: ignore[reportConstantRedefinition]

try:
from cattrs import structure as cattrs_structure
from cattrs import unstructure as cattrs_unstructure

CATTRS_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:

def cattrs_unstructure(*args: Any, **kwargs: Any) -> Any: # noqa: ARG001
Expand All @@ -315,8 +288,6 @@ def cattrs_structure(*args: Any, **kwargs: Any) -> Any: # noqa: ARG001
"""Placeholder implementation"""
return {}

CATTRS_INSTALLED = False # pyright: ignore[reportConstantRedefinition] # pyright: ignore[reportConstantRedefinition]


class EmptyEnum(Enum):
"""A sentinel enum used as placeholder."""
Expand Down Expand Up @@ -433,16 +404,12 @@ def __iter__(self) -> "Iterable[Any]":
from pyarrow import RecordBatchReader as ArrowRecordBatchReader
from pyarrow import Schema as ArrowSchema
from pyarrow import Table as ArrowTable

PYARROW_INSTALLED = True
except ImportError:
ArrowTable = ArrowTableResult # type: ignore[assignment,misc]
ArrowRecordBatch = ArrowRecordBatchResult # type: ignore[assignment,misc]
ArrowSchema = ArrowSchemaProtocol # type: ignore[assignment,misc]
ArrowRecordBatchReader = ArrowRecordBatchReaderProtocol # type: ignore[assignment,misc]

PYARROW_INSTALLED = False # pyright: ignore[reportConstantRedefinition]


@runtime_checkable
class PandasDataFrameProtocol(Protocol):
Expand Down Expand Up @@ -472,20 +439,15 @@ def __getitem__(self, key: Any) -> Any:

try:
from pandas import DataFrame as PandasDataFrame

PANDAS_INSTALLED = True
except ImportError:
PandasDataFrame = PandasDataFrameProtocol # type: ignore[assignment,misc]
PANDAS_INSTALLED = False


try:
from polars import DataFrame as PolarsDataFrame

POLARS_INSTALLED = True
except ImportError:
PolarsDataFrame = PolarsDataFrameProtocol # type: ignore[assignment,misc]
POLARS_INSTALLED = False


@runtime_checkable
Expand Down Expand Up @@ -514,8 +476,6 @@ def tolist(self) -> "list[Any]":
StatusCode,
Tracer, # pyright: ignore[reportMissingImports, reportAssignmentType]
)

OPENTELEMETRY_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
# Define shims for when opentelemetry is not installed

Expand Down Expand Up @@ -578,7 +538,6 @@ def get_tracer_provider(self) -> Any: # pragma: no cover
trace = _TraceModule() # type: ignore[assignment]
StatusCode = trace.StatusCode # type: ignore[misc]
Status = trace.Status # type: ignore[misc]
OPENTELEMETRY_INSTALLED = False # pyright: ignore[reportConstantRedefinition] # pyright: ignore[reportConstantRedefinition]


try:
Expand All @@ -587,8 +546,6 @@ def get_tracer_provider(self) -> Any: # pragma: no cover
Gauge, # pyright: ignore[reportAssignmentType]
Histogram, # pyright: ignore[reportAssignmentType]
)

PROMETHEUS_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
# Define shims for when prometheus_client is not installed

Expand Down Expand Up @@ -636,8 +593,6 @@ class Histogram(_Metric): # type: ignore[no-redef]
def labels(self, *labelvalues: str, **labelkwargs: str) -> _MetricInstance:
return _MetricInstance() # pragma: no cover

PROMETHEUS_INSTALLED = False # pyright: ignore[reportConstantRedefinition] # pyright: ignore[reportConstantRedefinition]


try:
import aiosql # pyright: ignore[reportMissingImports, reportAssignmentType]
Expand All @@ -651,8 +606,6 @@ def labels(self, *labelvalues: str, **labelkwargs: str) -> _MetricInstance:
from aiosql.types import ( # pyright: ignore[reportMissingImports, reportAssignmentType]
SyncDriverAdapterProtocol as AiosqlSyncProtocol, # pyright: ignore[reportMissingImports, reportAssignmentType]
)

AIOSQL_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
except ImportError:
# Define shims for when aiosql is not installed

Expand Down Expand Up @@ -723,16 +676,25 @@ async def insert_update_delete(self, conn: Any, query_name: str, sql: str, param
async def insert_update_delete_many(self, conn: Any, query_name: str, sql: str, parameters: Any) -> None: ...
async def insert_returning(self, conn: Any, query_name: str, sql: str, parameters: Any) -> "Any | None": ...

AIOSQL_INSTALLED = False # pyright: ignore[reportConstantRedefinition] # pyright: ignore[reportConstantRedefinition]


FSSPEC_INSTALLED = module_available("fsspec")
NUMPY_INSTALLED = module_available("numpy")
OBSTORE_INSTALLED = module_available("obstore")
PGVECTOR_INSTALLED = module_available("pgvector")

CLOUD_SQL_CONNECTOR_INSTALLED = module_available("google.cloud.sql.connector")
ALLOYDB_CONNECTOR_INSTALLED = module_available("google.cloud.alloydb.connector")
AIOSQL_INSTALLED = dependency_flag("aiosql")
ATTRS_INSTALLED = dependency_flag("attrs")
CATTRS_INSTALLED = dependency_flag("cattrs")
CLOUD_SQL_CONNECTOR_INSTALLED = dependency_flag("google.cloud.sql.connector")
FSSPEC_INSTALLED = dependency_flag("fsspec")
LITESTAR_INSTALLED = dependency_flag("litestar")
MSGSPEC_INSTALLED = dependency_flag("msgspec")
NUMPY_INSTALLED = dependency_flag("numpy")
OBSTORE_INSTALLED = dependency_flag("obstore")
OPENTELEMETRY_INSTALLED = dependency_flag("opentelemetry")
ORJSON_INSTALLED = dependency_flag("orjson")
PANDAS_INSTALLED = dependency_flag("pandas")
PGVECTOR_INSTALLED = dependency_flag("pgvector")
POLARS_INSTALLED = dependency_flag("polars")
PROMETHEUS_INSTALLED = dependency_flag("prometheus_client")
PYARROW_INSTALLED = dependency_flag("pyarrow")
PYDANTIC_INSTALLED = dependency_flag("pydantic")
ALLOYDB_CONNECTOR_INSTALLED = dependency_flag("google.cloud.alloydb.connector")

__all__ = (
"AIOSQL_INSTALLED",
Expand Down
Loading