Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
cb370cd
FEAT: Support Access Tokens for Bulk Copy
bewithgaurav Feb 9, 2026
d283b8f
Fix test_008_auth to unpack 3-tuple from process_connection_string
bewithgaurav Feb 9, 2026
356392f
Fix comments: replace ODBC/Rust references with DDBC/mssql-py-core
bewithgaurav Feb 10, 2026
f10f70d
Add test coverage for get_raw_token and auth_type assertions
bewithgaurav Feb 10, 2026
bf134cb
Fix auth_type propagation for bulkcopy on all platforms
bewithgaurav Feb 10, 2026
16e3a0c
Validate auth_type in _acquire_token before credential lookup
bewithgaurav Feb 10, 2026
cb2daea
Address review feedback: validate auth_type in _acquire_token, fix fa…
bewithgaurav Feb 10, 2026
f0f34fd
Add tests for extract_auth_type and _acquire_token unsupported auth_type
bewithgaurav Feb 10, 2026
4bfd259
Add SQL_COPT_SS_ACCESS_TOKEN constant, replace hardcoded 1256
bewithgaurav Feb 12, 2026
abca6eb
Merge branch 'main' of https://github.com/microsoft/mssql-python into…
bewithgaurav Feb 12, 2026
d81a22d
Wrap bulk copy token acquisition with context-specific error handling
bewithgaurav Feb 12, 2026
d532b8e
Catch ValueError in bulk copy token acquisition
bewithgaurav Feb 12, 2026
621391a
Forward all connection string params to py-core in bulkcopy
bewithgaurav Feb 18, 2026
da09a86
Merge main, resolve conflicts (keep connstr_to_pycore_params)
bewithgaurav Feb 18, 2026
7d70539
Improve connstr_to_pycore_params docstring and variable names
bewithgaurav Feb 18, 2026
0e861a9
Remove unsupported params from connstr_to_pycore_params key_map
bewithgaurav Feb 18, 2026
0b1941c
Fix coverage comment shell injection from diff-cover output
bewithgaurav Feb 18, 2026
8c2a696
Move boolean validation to connection.rs — pass trust_server_certific…
bewithgaurav Feb 18, 2026
dffadd6
Use first-wins for synonym keys to match ODBC behaviour
bewithgaurav Feb 19, 2026
942ad7d
Fix allowlist test to expect first-wins synonym behavior
bewithgaurav Feb 19, 2026
c8851a2
Merge branch 'main' of https://github.com/microsoft/mssql-python into…
bewithgaurav Feb 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pr-code-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,8 @@ jobs:
--arg covered_lines "${{ env.COVERED_LINES }}" \
--arg total_lines "${{ env.TOTAL_LINES }}" \
--arg patch_coverage_pct "${{ env.PATCH_COVERAGE_PCT }}" \
--arg low_coverage_files "${{ env.LOW_COVERAGE_FILES }}" \
--arg patch_coverage_summary "${{ env.PATCH_COVERAGE_SUMMARY }}" \
--arg low_coverage_files "$LOW_COVERAGE_FILES" \
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor code coverage fix, this PR had some codeblocks which needed to be escaped, since they were getting into the bash terminal

--arg patch_coverage_summary "$PATCH_COVERAGE_SUMMARY" \
--arg ado_url "${{ env.ADO_URL }}" \
'{
pr_number: $pr_number,
Expand Down
6 changes: 4 additions & 2 deletions mssql_python/connection_string_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ def _normalize_params(params: Dict[str, str], warn_rejected: bool = True) -> Dic
if normalized_key in _RESERVED_PARAMETERS:
continue

# Parameter is allowed
filtered[normalized_key] = value
# First-wins: match ODBC behaviour where the first
# occurrence of a synonym group takes precedence.
if normalized_key not in filtered:
filtered[normalized_key] = value
else:
# Parameter is not in allow-list
# Note: In normal flow, this should be empty since parser validates first
Expand Down
46 changes: 7 additions & 39 deletions mssql_python/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import warnings
from typing import List, Union, Any, Optional, Tuple, Sequence, TYPE_CHECKING, Iterable
from mssql_python.constants import ConstantsDDBC as ddbc_sql_const, SQLTypes
from mssql_python.helpers import check_error
from mssql_python.helpers import check_error, connstr_to_pycore_params
from mssql_python.logging import logger
from mssql_python import ddbc_bindings
from mssql_python.exceptions import (
Expand Down Expand Up @@ -2498,6 +2498,7 @@ def nextset(self) -> Union[bool, None]:
)
return True

# ── Mapping from ODBC connection-string keywords (lowercase, as _parse returns)
def _bulkcopy(
self,
table_name: str,
Expand Down Expand Up @@ -2632,38 +2633,10 @@ def _bulkcopy(
"Specify the target database explicitly to avoid accidentally writing to system databases."
)

# Build connection context for bulk copy library
# Note: Password is extracted separately to avoid storing it in the main context
# dict that could be accidentally logged or exposed in error messages.
trust_cert = params.get("trustservercertificate", "yes").lower() in ("yes", "true")

# Parse encryption setting from connection string
encrypt_param = params.get("encrypt")
if encrypt_param is not None:
encrypt_value = encrypt_param.strip().lower()
if encrypt_value in ("yes", "true", "mandatory", "required"):
encryption = "Required"
elif encrypt_value in ("no", "false", "optional"):
encryption = "Optional"
else:
# Pass through unrecognized values (e.g., "Strict") to the underlying driver
encryption = encrypt_param
else:
encryption = "Optional"

context = {
"server": params.get("server"),
"database": params.get("database"),
"trust_server_certificate": trust_cert,
"encryption": encryption,
}

# Build pycore_context with appropriate authentication.
# For Azure AD: acquire a FRESH token right now instead of reusing
# the one from connect() time — avoids expired-token errors when
# bulkcopy() is called long after the original connection.
pycore_context = dict(context)
# Translate parsed connection string into the dict py-core expects.
pycore_context = connstr_to_pycore_params(params)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have kept cursor.py to be just passing the params and not at all containing any "validation" logic since we'll take care of that in pycore

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I am beginning to realize that all this complexity needs to be consolidated in one location


# Token acquisition — only thing cursor must handle (needs azure-identity SDK)
if self.connection._auth_type:
# Fresh token acquisition for mssql-py-core connection
from mssql_python.auth import AADAuth
Expand All @@ -2680,10 +2653,6 @@ def _bulkcopy(
"Bulk copy: acquired fresh Azure AD token for auth_type=%s",
self.connection._auth_type,
)
else:
# SQL Server authentication — use uid/password from connection string
pycore_context["user_name"] = params.get("uid", "")
pycore_context["password"] = params.get("pwd", "")

pycore_connection = None
pycore_cursor = None
Expand Down Expand Up @@ -2722,9 +2691,8 @@ def _bulkcopy(
finally:
# Clear sensitive data to minimize memory exposure
if pycore_context:
pycore_context.pop("password", None)
pycore_context.pop("user_name", None)
pycore_context.pop("access_token", None)
for key in ("password", "user_name", "access_token"):
pycore_context.pop(key, None)
# Clean up bulk copy resources
for resource in (pycore_cursor, pycore_connection):
if resource and hasattr(resource, "close"):
Expand Down
91 changes: 91 additions & 0 deletions mssql_python/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,97 @@ def _sanitize_for_logging(input_val: Any, max_length: int = max_log_length) -> s
return True, None, sanitized_attr, sanitized_val


def connstr_to_pycore_params(params: dict) -> dict:
"""Translate parsed ODBC connection-string params for py-core's bulk copy path.

When ``cursor.bulkcopy()`` is called, mssql-python opens a *separate*
connection through mssql-py-core.
py-core's ``connection.rs`` expects a Python dict with snake_case keys —
different from the ODBC-style keys that ``_ConnectionStringParser._parse``
returns.

This function bridges that gap: it maps lowercase ODBC keys (e.g.
``"trustservercertificate"``) to py-core keys (``"trust_server_certificate"``)
and converts numeric strings to ``int`` for timeout/size params.
Boolean params (TrustServerCertificate, MultiSubnetFailover) are passed as
strings — ``connection.rs`` validates Yes/No and rejects invalid values.
Unrecognised keys are silently dropped.
"""
# Only keys listed below are forwarded to py-core.
# Unknown/reserved keys (app, workstationid, language, connect_timeout,
# mars_connection) are silently dropped here. In the normal connect()
# path the parser validates keywords first (validate_keywords=True),
# but bulkcopy parses with validation off, so this mapping is the
# authoritative filter in that path.
key_map = {
# auth / credentials
"uid": "user_name",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this map is being used to convert all the connection string params into pycore context
we can go ahead with using the conn string params as is inside pycore but that will require changing a lot of tests, hence added a map

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to consider adding constants for these strings. Not necessary for this PR, but have a tracking item. The Rust string constants should come from the Py-core native modules and the python specific ones can stay in mssql-python

"pwd": "password",
"trusted_connection": "trusted_connection",
"authentication": "authentication",
# server (accept parser synonyms)
"server": "server",
"addr": "server",
"address": "server",
# database
"database": "database",
"applicationintent": "application_intent",
# encryption / TLS (include snake_case alias the parser may emit)
"encrypt": "encryption",
"trustservercertificate": "trust_server_certificate",
"trust_server_certificate": "trust_server_certificate",
"hostnameincertificate": "host_name_in_certificate",
"servercertificate": "server_certificate",
# Kerberos
"serverspn": "server_spn",
# network
"multisubnetfailover": "multi_subnet_failover",
"ipaddresspreference": "ip_address_preference",
"keepalive": "keep_alive",
"keepaliveinterval": "keep_alive_interval",
# sizing / limits ("packet size" with space is a common pyodbc-ism)
"packetsize": "packet_size",
"packet size": "packet_size",
"connectretrycount": "connect_retry_count",
"connectretryinterval": "connect_retry_interval",
}
int_keys = {
"packet_size",
"connect_retry_count",
"connect_retry_interval",
"keep_alive",
"keep_alive_interval",
}

pycore_params: dict = {}

for connstr_key, pycore_key in key_map.items():
raw_value = params.get(connstr_key)
if raw_value is None:
continue

# First-wins: match ODBC behaviour — first synonym in the
# connection string takes precedence (e.g. Addr before Server).
if pycore_key in pycore_params:
continue

# ODBC values are always strings; py-core expects native types for int keys.
# Boolean params (trust_server_certificate, multi_subnet_failover) are passed
# as strings — all Yes/No validation is in connection.rs for single-location
# consistency with Encrypt, ApplicationIntent, IPAddressPreference, etc.
if pycore_key in int_keys:
# Numeric params (timeouts, packet size, etc.) — skip on bad input
try:
pycore_params[pycore_key] = int(raw_value)
except (ValueError, TypeError):
pass # let py-core fall back to its compiled-in default
else:
# String params (server, database, encryption, etc.) — pass through
pycore_params[pycore_key] = raw_value

return pycore_params


# Settings functionality moved here to avoid circular imports

# Initialize the locale setting only once at module import time
Expand Down
96 changes: 96 additions & 0 deletions tests/test_010_connection_string_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,99 @@ def test_incomplete_entry_recovery(self):
# Should have error about incomplete 'Server'
errors = exc_info.value.errors
assert any("Server" in err and "Incomplete specification" in err for err in errors)


class TestSynonymFirstWins:
"""
Verify that _normalize_params uses first-wins for synonym keys.

ODBC Driver 18 behaviour (confirmed via live test against sqlcconn.cpp):
- Same key repeated → first-wins (fFromAttrOrProp guard)
- Addr vs Address → same KEY_ADDR slot, first-wins
- Addr/Address vs Server → separate slots, Addr/Address takes priority

_ConnectionStringParser._parse() rejects exact duplicate keys outright.
These tests cover synonyms that map to the same canonical key during
normalization (e.g. addr/address/server → "Server").
"""

@staticmethod
def _normalize(raw: dict) -> dict:
"""Shorthand for calling _normalize_params with warnings suppressed."""
return _ConnectionStringParser._normalize_params(raw, warn_rejected=False)

# ---- server / addr / address synonyms --------------------------------

def test_server_then_addr_first_wins(self):
"""Server=A;Addr=B → first-wins keeps A."""
result = self._normalize({"server": "hostA", "addr": "hostB"})
assert result["Server"] == "hostA"

def test_addr_then_server_first_wins(self):
"""Addr=A;Server=B → first-wins keeps A."""
result = self._normalize({"addr": "hostA", "server": "hostB"})
assert result["Server"] == "hostA"

def test_address_then_server_first_wins(self):
"""Address=A;Server=B → first-wins keeps A."""
result = self._normalize({"address": "hostA", "server": "hostB"})
assert result["Server"] == "hostA"

def test_addr_then_address_first_wins(self):
"""Addr=A;Address=B → first-wins keeps A."""
result = self._normalize({"addr": "hostA", "address": "hostB"})
assert result["Server"] == "hostA"

def test_all_three_server_synonyms_first_wins(self):
"""Addr=A;Address=B;Server=C → first-wins keeps A."""
result = self._normalize({"addr": "hostA", "address": "hostB", "server": "hostC"})
assert result["Server"] == "hostA"

def test_server_only_no_synonyms(self):
"""Single key has no conflict."""
result = self._normalize({"server": "hostA"})
assert result["Server"] == "hostA"

# ---- trustservercertificate / trust_server_certificate synonyms ------

def test_trustservercertificate_then_snake_case_first_wins(self):
"""trustservercertificate=Yes;trust_server_certificate=No → first-wins keeps Yes."""
result = self._normalize(
{"trustservercertificate": "Yes", "trust_server_certificate": "No"}
)
assert result["TrustServerCertificate"] == "Yes"

def test_snake_case_then_trustservercertificate_first_wins(self):
"""trust_server_certificate=No;trustservercertificate=Yes → first-wins keeps No."""
result = self._normalize(
{"trust_server_certificate": "No", "trustservercertificate": "Yes"}
)
assert result["TrustServerCertificate"] == "No"

# ---- packetsize / "packet size" synonyms -----------------------------

def test_packetsize_then_packet_space_first_wins(self):
"""packetsize=8192;packet size=4096 → first-wins keeps 8192."""
result = self._normalize({"packetsize": "8192", "packet size": "4096"})
assert result["PacketSize"] == "8192"

def test_packet_space_then_packetsize_first_wins(self):
"""packet size=4096;packetsize=8192 → first-wins keeps 4096."""
result = self._normalize({"packet size": "4096", "packetsize": "8192"})
assert result["PacketSize"] == "4096"

# ---- non-synonym keys are unaffected ---------------------------------

def test_different_keys_both_kept(self):
"""Non-synonym keys should both be present."""
result = self._normalize({"server": "host", "database": "mydb", "uid": "sa"})
assert result == {"Server": "host", "Database": "mydb", "UID": "sa"}

# ---- reserved keys filtered regardless of order ----------------------

def test_reserved_keys_always_filtered(self):
"""Driver and APP are always stripped, even when first."""
result = self._normalize({"driver": "foo", "server": "host", "app": "bar"})
assert "Driver" not in result
assert "APP" not in result
assert result["Server"] == "host"
4 changes: 2 additions & 2 deletions tests/test_011_connection_string_allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def test__normalize_params_handles_address_variants(self):
"""Test filtering handles address/addr/server as synonyms."""
params = {"address": "addr1", "addr": "addr2", "server": "server1"}
filtered = _ConnectionStringParser._normalize_params(params, warn_rejected=False)
# All three are synonyms that map to 'Server', last one wins
assert filtered["Server"] == "server1"
# All three are synonyms that map to 'Server', first one wins
assert filtered["Server"] == "addr1"
assert "Address" not in filtered
assert "Addr" not in filtered

Expand Down
Loading