Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ target/
.claude/
.cursor/
.zed/

.gemini
.coverage*
# files
**/*.so
**/*.sqlite
Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ test = [
"requests",
"anyio",
"coverage>=7.6.1",
"covdefaults",
"pytest>=8.0.0",
"pytest-asyncio>=0.23.8",
"pytest-cov>=5.0.0",
Expand Down Expand Up @@ -168,6 +169,14 @@ include = [
"sqlspec/utils/type_guards.py", # Type guard utilities
"sqlspec/utils/fixtures.py", # File fixture loading
"sqlspec/utils/data_transformation.py", # Data transformation utilities

# === STORAGE LAYER ===
"sqlspec/storage/_utils.py",
"sqlspec/storage/registry.py",
"sqlspec/storage/backends/base.py",
"sqlspec/storage/backends/obstore.py",
"sqlspec/storage/backends/fsspec.py",
"sqlspec/storage/backends/local.py",
]
mypy-args = [
"--ignore-missing-imports",
Expand Down
98 changes: 98 additions & 0 deletions sqlspec/storage/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Shared utilities for storage backends."""

from typing import TYPE_CHECKING

from sqlspec.exceptions import MissingDependencyError
from sqlspec.typing import PYARROW_INSTALLED

if TYPE_CHECKING:
from pathlib import Path

__all__ = ("ensure_pyarrow", "resolve_storage_path")


def ensure_pyarrow() -> None:
"""Ensure PyArrow is available for Arrow operations.

Raises:
MissingDependencyError: If pyarrow is not installed.
"""
if not PYARROW_INSTALLED:
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")


def resolve_storage_path(
path: "str | Path", base_path: str = "", protocol: str = "file", strip_file_scheme: bool = True
) -> str:
"""Resolve path relative to base_path with protocol-specific handling.

Args:
path: Path to resolve (may include file:// scheme).
base_path: Base path to prepend if path is relative.
protocol: Storage protocol (file, s3, gs, etc.).
strip_file_scheme: Whether to strip file:// prefix.

Returns:
Resolved path string suitable for the storage backend.

Examples:
>>> resolve_storage_path("/data/file.txt", protocol="file")
'data/file.txt'

>>> resolve_storage_path(
... "file.txt", base_path="/base", protocol="file"
... )
'base/file.txt'

>>> resolve_storage_path(
... "file:///data/file.txt", strip_file_scheme=True
... )
'data/file.txt'

>>> resolve_storage_path(
... "/data/subdir/file.txt",
... base_path="/data",
... protocol="file",
... )
'subdir/file.txt'
"""
from pathlib import Path as PathlibPath

path_str = str(path)

if strip_file_scheme and path_str.startswith("file://"):
path_str = path_str.removeprefix("file://")

# For local file protocol
if protocol == "file":
path_obj = PathlibPath(path_str)

# Absolute path handling
if path_obj.is_absolute():
if base_path:
base_obj = PathlibPath(base_path)
# Try to make path relative to base_path
try:
relative = path_obj.relative_to(base_obj)
# Return joined path for FSSpec-style backends
return f"{base_path.rstrip('/')}/{relative}"
except ValueError:
# Path is outside base_path
return path_str.lstrip("/")
# No base_path - strip leading /
return path_str.lstrip("/")

# Relative path with base_path - join them
if base_path:
return f"{base_path.rstrip('/')}/{path_str}"

# Relative path without base_path
return path_str

# For cloud storage protocols (s3, gs, etc.), join with base_path
if not base_path:
return path_str

clean_base = base_path.rstrip("/")
clean_path = path_str.lstrip("/")
return f"{clean_base}/{clean_path}"
Loading
Loading