Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: CI

on:
pull_request:
types: [opened, synchronize, reopened, labeled]
push:
branches: [main]

Expand Down
16 changes: 13 additions & 3 deletions db2sql/domain/policy/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@

import re

_CAMEL_RE = re.compile(r"(?<!^)(?=[A-Z])")
# Split before a capitalized word so acronyms stay glued: "HTTPServer" → "HTTP_Server".
_CAMEL_BOUNDARY_RE = re.compile(r"(.)([A-Z][a-z]+)")
# Split between lowercase/digit and uppercase to catch trailing acronyms: "userID" → "user_ID".
_LOWER_UPPER_RE = re.compile(r"([a-z0-9])([A-Z])")


def to_snake_case(name: str) -> str:
"""Convert CamelCase to snake_case (idempotent on already-snake_case input)."""
return _CAMEL_RE.sub("_", name).lower()
"""Convert CamelCase / PascalCase to snake_case.

Keeps acronym runs intact (``HTTPServer`` → ``http_server``,
``UserID`` → ``user_id``) and leaves all-caps identifiers as a single
word (``MYTABLE`` → ``mytable``). Idempotent on snake_case input.
"""
name = _CAMEL_BOUNDARY_RE.sub(r"\1_\2", name)
name = _LOWER_UPPER_RE.sub(r"\1_\2", name)
return name.lower()


def normalize_identifier(name: str, preserve_case: bool) -> str:
Expand Down
14 changes: 13 additions & 1 deletion docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,19 @@ Output
column names) are kept exactly as they appear in the source database.

When disabled (the default), identifiers are converted to ``snake_case``
so they work without quoting in PostgreSQL.
so they work without quoting in PostgreSQL. The conversion keeps
acronym runs glued together and collapses all-caps identifiers to a
single word:

===================== =====================
Source identifier Converted
===================== =====================
``CamelCase`` ``camel_case``
``HTTPServer`` ``http_server``
``UserID`` ``user_id``
``MYTABLE`` ``mytable``
``customer_ID`` ``customer_id``
===================== =====================

.. option:: --transaction / --no-transaction

Expand Down
9 changes: 7 additions & 2 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,11 @@ dump
* - ``preserve_case``
- ``false``
- Preserve identifier case as-is. When ``false``, identifiers are
converted to ``snake_case``.
converted to ``snake_case`` — acronym runs stay glued
(``HTTPServer`` → ``http_server``, ``UserID`` → ``user_id``) and
all-caps names collapse to a single word
(``MYTABLE`` → ``mytable``). See :option:`--preserve-case` for
the full table of examples.
* - ``limit_records``
- ``-1``
- Maximum rows per table. ``-1`` means no limit.
Expand Down Expand Up @@ -642,7 +646,8 @@ Oracle connections are configured almost entirely through
preserve_case: false
default_data_format: copy
mapping_schemas:
HR: hr # rewrite Oracle's upper-case owner to snake_case
HR: human_resources # optional rename; snake_case normalization
# alone would already produce "hr"

Oracle → MSSQL
~~~~~~~~~~~~~~
Expand Down
14 changes: 12 additions & 2 deletions tests/unit/domain/policy/test_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,27 @@
"name, expected",
[
("CamelCase", "camel_case"),
("HTTPServer", "h_t_t_p_server"),
("HTTPServer", "http_server"),
("already_snake", "already_snake"),
("lower", "lower"),
("Single", "single"),
("UserID", "user_i_d"),
("UserID", "user_id"),
("XMLParser", "xml_parser"),
("getHTTPResponseCode", "get_http_response_code"),
("MYTABLE", "mytable"),
("customer_ID", "customer_id"),
("", ""),
],
)
def test_to_snake_case(name: str, expected: str) -> None:
assert to_snake_case(name) == expected


def test_to_snake_case_is_idempotent() -> None:
once = to_snake_case("HTTPServerName")
assert to_snake_case(once) == once


def test_normalize_identifier_preserve_case_returns_input() -> None:
assert normalize_identifier("UserName", preserve_case=True) == "UserName"

Expand Down
Loading