-
Notifications
You must be signed in to change notification settings - Fork 35
FEAT: Pass all connection string params to mssql-py-core for bulk copy #439
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
cb370cd
d283b8f
356392f
f10f70d
bf134cb
16e3a0c
cb2daea
f0f34fd
4bfd259
abca6eb
d81a22d
d532b8e
621391a
da09a86
7d70539
0e861a9
0b1941c
8c2a696
dffadd6
942ad7d
c8851a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |
| import warnings | ||
| from typing import List, Union, Any, Optional, Tuple, Sequence, TYPE_CHECKING, Iterable | ||
| from mssql_python.constants import ConstantsDDBC as ddbc_sql_const, SQLTypes | ||
| from mssql_python.helpers import check_error | ||
| from mssql_python.helpers import check_error, connstr_to_pycore_params | ||
| from mssql_python.logging import logger | ||
| from mssql_python import ddbc_bindings | ||
| from mssql_python.exceptions import ( | ||
|
|
@@ -2498,6 +2498,7 @@ def nextset(self) -> Union[bool, None]: | |
| ) | ||
| return True | ||
|
|
||
| # ── Mapping from ODBC connection-string keywords (lowercase, as _parse returns) | ||
| def _bulkcopy( | ||
| self, | ||
| table_name: str, | ||
|
|
@@ -2632,38 +2633,10 @@ def _bulkcopy( | |
| "Specify the target database explicitly to avoid accidentally writing to system databases." | ||
| ) | ||
|
|
||
| # Build connection context for bulk copy library | ||
| # Note: Password is extracted separately to avoid storing it in the main context | ||
| # dict that could be accidentally logged or exposed in error messages. | ||
| trust_cert = params.get("trustservercertificate", "yes").lower() in ("yes", "true") | ||
|
|
||
| # Parse encryption setting from connection string | ||
| encrypt_param = params.get("encrypt") | ||
| if encrypt_param is not None: | ||
| encrypt_value = encrypt_param.strip().lower() | ||
| if encrypt_value in ("yes", "true", "mandatory", "required"): | ||
| encryption = "Required" | ||
| elif encrypt_value in ("no", "false", "optional"): | ||
| encryption = "Optional" | ||
| else: | ||
| # Pass through unrecognized values (e.g., "Strict") to the underlying driver | ||
| encryption = encrypt_param | ||
| else: | ||
| encryption = "Optional" | ||
|
|
||
| context = { | ||
| "server": params.get("server"), | ||
| "database": params.get("database"), | ||
| "trust_server_certificate": trust_cert, | ||
| "encryption": encryption, | ||
| } | ||
|
|
||
| # Build pycore_context with appropriate authentication. | ||
| # For Azure AD: acquire a FRESH token right now instead of reusing | ||
| # the one from connect() time — avoids expired-token errors when | ||
| # bulkcopy() is called long after the original connection. | ||
| pycore_context = dict(context) | ||
| # Translate parsed connection string into the dict py-core expects. | ||
| pycore_context = connstr_to_pycore_params(params) | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. have kept
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I am beginning to realize that all this complexity needs to be consolidated in one location |
||
|
|
||
| # Token acquisition — only thing cursor must handle (needs azure-identity SDK) | ||
| if self.connection._auth_type: | ||
| # Fresh token acquisition for mssql-py-core connection | ||
| from mssql_python.auth import AADAuth | ||
|
|
@@ -2680,10 +2653,6 @@ def _bulkcopy( | |
| "Bulk copy: acquired fresh Azure AD token for auth_type=%s", | ||
| self.connection._auth_type, | ||
| ) | ||
| else: | ||
| # SQL Server authentication — use uid/password from connection string | ||
| pycore_context["user_name"] = params.get("uid", "") | ||
| pycore_context["password"] = params.get("pwd", "") | ||
|
|
||
| pycore_connection = None | ||
| pycore_cursor = None | ||
|
|
@@ -2722,9 +2691,8 @@ def _bulkcopy( | |
| finally: | ||
| # Clear sensitive data to minimize memory exposure | ||
| if pycore_context: | ||
| pycore_context.pop("password", None) | ||
| pycore_context.pop("user_name", None) | ||
| pycore_context.pop("access_token", None) | ||
| for key in ("password", "user_name", "access_token"): | ||
| pycore_context.pop(key, None) | ||
| # Clean up bulk copy resources | ||
| for resource in (pycore_cursor, pycore_connection): | ||
| if resource and hasattr(resource, "close"): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -250,6 +250,97 @@ def _sanitize_for_logging(input_val: Any, max_length: int = max_log_length) -> s | |
| return True, None, sanitized_attr, sanitized_val | ||
|
|
||
|
|
||
| def connstr_to_pycore_params(params: dict) -> dict: | ||
| """Translate parsed ODBC connection-string params for py-core's bulk copy path. | ||
|
|
||
| When ``cursor.bulkcopy()`` is called, mssql-python opens a *separate* | ||
| connection through mssql-py-core. | ||
| py-core's ``connection.rs`` expects a Python dict with snake_case keys — | ||
| different from the ODBC-style keys that ``_ConnectionStringParser._parse`` | ||
| returns. | ||
|
|
||
| This function bridges that gap: it maps lowercase ODBC keys (e.g. | ||
| ``"trustservercertificate"``) to py-core keys (``"trust_server_certificate"``) | ||
| and converts numeric strings to ``int`` for timeout/size params. | ||
| Boolean params (TrustServerCertificate, MultiSubnetFailover) are passed as | ||
| strings — ``connection.rs`` validates Yes/No and rejects invalid values. | ||
| Unrecognised keys are silently dropped. | ||
| """ | ||
| # Only keys listed below are forwarded to py-core. | ||
| # Unknown/reserved keys (app, workstationid, language, connect_timeout, | ||
| # mars_connection) are silently dropped here. In the normal connect() | ||
| # path the parser validates keywords first (validate_keywords=True), | ||
| # but bulkcopy parses with validation off, so this mapping is the | ||
| # authoritative filter in that path. | ||
| key_map = { | ||
| # auth / credentials | ||
| "uid": "user_name", | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this map is being used to convert all the connection string params into pycore context
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to consider adding constants for these strings. Not necessary for this PR, but have a tracking item. The Rust string constants should come from the Py-core native modules and the python specific ones can stay in mssql-python |
||
| "pwd": "password", | ||
| "trusted_connection": "trusted_connection", | ||
| "authentication": "authentication", | ||
| # server (accept parser synonyms) | ||
| "server": "server", | ||
| "addr": "server", | ||
| "address": "server", | ||
| # database | ||
| "database": "database", | ||
| "applicationintent": "application_intent", | ||
| # encryption / TLS (include snake_case alias the parser may emit) | ||
| "encrypt": "encryption", | ||
| "trustservercertificate": "trust_server_certificate", | ||
| "trust_server_certificate": "trust_server_certificate", | ||
| "hostnameincertificate": "host_name_in_certificate", | ||
| "servercertificate": "server_certificate", | ||
| # Kerberos | ||
| "serverspn": "server_spn", | ||
| # network | ||
| "multisubnetfailover": "multi_subnet_failover", | ||
| "ipaddresspreference": "ip_address_preference", | ||
| "keepalive": "keep_alive", | ||
| "keepaliveinterval": "keep_alive_interval", | ||
| # sizing / limits ("packet size" with space is a common pyodbc-ism) | ||
| "packetsize": "packet_size", | ||
| "packet size": "packet_size", | ||
| "connectretrycount": "connect_retry_count", | ||
| "connectretryinterval": "connect_retry_interval", | ||
| } | ||
| int_keys = { | ||
| "packet_size", | ||
| "connect_retry_count", | ||
| "connect_retry_interval", | ||
| "keep_alive", | ||
| "keep_alive_interval", | ||
| } | ||
|
|
||
| pycore_params: dict = {} | ||
|
|
||
| for connstr_key, pycore_key in key_map.items(): | ||
| raw_value = params.get(connstr_key) | ||
| if raw_value is None: | ||
| continue | ||
|
|
||
| # First-wins: match ODBC behaviour — first synonym in the | ||
| # connection string takes precedence (e.g. Addr before Server). | ||
| if pycore_key in pycore_params: | ||
| continue | ||
|
|
||
| # ODBC values are always strings; py-core expects native types for int keys. | ||
| # Boolean params (trust_server_certificate, multi_subnet_failover) are passed | ||
| # as strings — all Yes/No validation is in connection.rs for single-location | ||
| # consistency with Encrypt, ApplicationIntent, IPAddressPreference, etc. | ||
| if pycore_key in int_keys: | ||
| # Numeric params (timeouts, packet size, etc.) — skip on bad input | ||
| try: | ||
| pycore_params[pycore_key] = int(raw_value) | ||
| except (ValueError, TypeError): | ||
| pass # let py-core fall back to its compiled-in default | ||
| else: | ||
| # String params (server, database, encryption, etc.) — pass through | ||
| pycore_params[pycore_key] = raw_value | ||
|
|
||
| return pycore_params | ||
|
|
||
|
|
||
| # Settings functionality moved here to avoid circular imports | ||
|
|
||
| # Initialize the locale setting only once at module import time | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
minor code coverage fix, this PR had some codeblocks which needed to be escaped, since they were getting into the bash terminal