From dea54908bcd8f4a1100a69f32061a0d5de5799b6 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Tue, 20 May 2025 13:36:29 +0530 Subject: [PATCH 1/4] FEAT: Adding options for BCP with comparison bcp.exe --- mssql_python/bcp_options.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 mssql_python/bcp_options.py diff --git a/mssql_python/bcp_options.py b/mssql_python/bcp_options.py new file mode 100644 index 00000000..9faccdd0 --- /dev/null +++ b/mssql_python/bcp_options.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass, field +from typing import List, Optional + +@dataclass +class ColumnFormat: + prefix_len: int # Option: (format_file) or (prefix_len, data_len) + data_len: int # Option: (format_file) or (prefix_len, data_len) + field_terminator: Optional[str] = None # Option: (-t) + row_terminator: Optional[str] = None # Option: (-r) + server_col: int = 1 # Option: (format_file) or (server_col) + file_col: int = 1 # Option: (format_file) or (file_col) + +@dataclass +class BCPOptions: + direction: str # 'in' or 'out' Option: (-i or -o) + data_file: str # Option: (positional argument) + error_file: Optional[str] = None # Option: (-e) + format_file: Optional[str] = None # Option: (-f) + write_format_file: Optional[str] = None # Option: (-x) + batch_size: Optional[int] = None # Option: (-b) + max_errors: Optional[int] = None # Option: (-m) + first_row: Optional[int] = None # Option: (-F) + last_row: Optional[int] = None # Option: (-L) + code_page: Optional[str] = None # Option: (-C) + keep_identity: bool = False # Option: (-E) + keep_nulls: bool = False # Option: (-k) + hints: Optional[str] = None # Option: (-h) + bulk_mode: str = "native" # native, char, unicode Option: (-n, -c, -w) + columns: Optional[List[ColumnFormat]] = field(default_factory=list) # Option: (format_file) or (columns) \ No newline at end of file From 878650b7a648464f07cee1baa6643787563d8fa5 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Tue, 20 May 2025 13:52:24 +0530 Subject: [PATCH 2/4] FEAT: Adding options for BCP with comparison bcp.exe --- mssql_python/bcp_options.py | 73 ++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/mssql_python/bcp_options.py b/mssql_python/bcp_options.py index 9faccdd0..d35bb089 100644 --- a/mssql_python/bcp_options.py +++ b/mssql_python/bcp_options.py @@ -1,29 +1,58 @@ from dataclasses import dataclass, field -from typing import List, Optional +from typing import List, Optional, Literal @dataclass class ColumnFormat: - prefix_len: int # Option: (format_file) or (prefix_len, data_len) - data_len: int # Option: (format_file) or (prefix_len, data_len) - field_terminator: Optional[str] = None # Option: (-t) - row_terminator: Optional[str] = None # Option: (-r) - server_col: int = 1 # Option: (format_file) or (server_col) - file_col: int = 1 # Option: (format_file) or (file_col) + """ + Represents the format of a column in a bulk copy operation. + Attributes: + prefix_len (int): Option: (format_file) or (prefix_len, data_len). + data_len (int): Option: (format_file) or (prefix_len, data_len). + field_terminator (Optional[str]): Option: (-t). The field terminator string. + row_terminator (Optional[str]): Option: (-r). The row terminator string. + server_col (int): Option: (format_file) or (server_col). The server column number. + file_col (int): Option: (format_file) or (file_col). The file column number. + """ + prefix_len: int + data_len: int + field_terminator: Optional[str] = None + row_terminator: Optional[str] = None + server_col: int = 1 + file_col: int = 1 @dataclass class BCPOptions: - direction: str # 'in' or 'out' Option: (-i or -o) - data_file: str # Option: (positional argument) - error_file: Optional[str] = None # Option: (-e) - format_file: Optional[str] = None # Option: (-f) - write_format_file: Optional[str] = None # Option: (-x) - batch_size: Optional[int] = None # Option: (-b) - max_errors: Optional[int] = None # Option: (-m) - first_row: Optional[int] = None # Option: (-F) - last_row: Optional[int] = None # Option: (-L) - code_page: Optional[str] = None # Option: (-C) - keep_identity: bool = False # Option: (-E) - keep_nulls: bool = False # Option: (-k) - hints: Optional[str] = None # Option: (-h) - bulk_mode: str = "native" # native, char, unicode Option: (-n, -c, -w) - columns: Optional[List[ColumnFormat]] = field(default_factory=list) # Option: (format_file) or (columns) \ No newline at end of file + """ + Represents the options for a bulk copy operation. + Attributes: + direction (Literal(str)): 'in' or 'out'. Option: (-i or -o). + data_file (str): The data file. Option: (positional argument). + error_file (Optional[str]): The error file. Option: (-e). + format_file (Optional[str]): The format file. Option: (-f). + write_format_file (Optional[str]): Write a format file. Option: (-x). + batch_size (Optional[int]): The batch size. Option: (-b). + max_errors (Optional[int]): The maximum number of errors allowed. Option: (-m). + first_row (Optional[int]): The first row to process. Option: (-F). + last_row (Optional[int]): The last row to process. Option: (-L). + code_page (Optional[str]): The code page. Option: (-C). + keep_identity (bool): Keep identity values. Option: (-E). + keep_nulls (bool): Keep null values. Option: (-k). + hints (Optional[str]): Additional hints. Option: (-h). + bulk_mode (str): Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w). + columns (List[ColumnFormat]): Column formats. Option: (format_file) or (columns). + """ + direction: Literal["in", "out"] + data_file: str + error_file: Optional[str] = None + format_file: Optional[str] = None + write_format_file: Optional[str] = None + batch_size: Optional[int] = None + max_errors: Optional[int] = None + first_row: Optional[int] = None + last_row: Optional[int] = None + code_page: Optional[str] = None + keep_identity: bool = False + keep_nulls: bool = False + hints: Optional[str] = None + bulk_mode: Literal["native", "char", "unicode"] = "native" + columns: List[ColumnFormat] = field(default_factory=list) \ No newline at end of file From 51433af9948c8ee6efaf16e5b5b5b8c2666d0b72 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Wed, 21 May 2025 09:55:22 +0530 Subject: [PATCH 3/4] FEAT: BCP OPTION - Adding validation for each feild --- mssql_python/bcp_options.py | 83 +++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 7 deletions(-) diff --git a/mssql_python/bcp_options.py b/mssql_python/bcp_options.py index d35bb089..67b65210 100644 --- a/mssql_python/bcp_options.py +++ b/mssql_python/bcp_options.py @@ -1,25 +1,54 @@ from dataclasses import dataclass, field from typing import List, Optional, Literal + @dataclass class ColumnFormat: """ Represents the format of a column in a bulk copy operation. Attributes: prefix_len (int): Option: (format_file) or (prefix_len, data_len). + The length of the prefix for fixed-length data types. Must be non-negative. data_len (int): Option: (format_file) or (prefix_len, data_len). - field_terminator (Optional[str]): Option: (-t). The field terminator string. - row_terminator (Optional[str]): Option: (-r). The row terminator string. - server_col (int): Option: (format_file) or (server_col). The server column number. - file_col (int): Option: (format_file) or (file_col). The file column number. + The length of the data. Must be non-negative. + field_terminator (Optional[bytes]): Option: (-t). The field terminator string. + e.g., b',' for comma-separated values. + row_terminator (Optional[bytes]): Option: (-r). The row terminator string. + e.g., b'\\n' for newline-terminated rows. + server_col (int): Option: (format_file) or (server_col). The 1-based column number + in the SQL Server table. Defaults to 1, representing the first column. + Must be a positive integer. + file_col (int): Option: (format_file) or (file_col). The 1-based column number + in the data file. Defaults to 1, representing the first column. + Must be a positive integer. """ + prefix_len: int data_len: int - field_terminator: Optional[str] = None - row_terminator: Optional[str] = None + field_terminator: Optional[bytes] = None + row_terminator: Optional[bytes] = None server_col: int = 1 file_col: int = 1 + def __post_init__(self): + if self.prefix_len < 0: + raise ValueError("prefix_len must be a non-negative integer.") + if self.data_len < 0: + raise ValueError("data_len must be a non-negative integer.") + if self.server_col <= 0: + raise ValueError("server_col must be a positive integer (1-based).") + if self.file_col <= 0: + raise ValueError("file_col must be a positive integer (1-based).") + if self.field_terminator is not None and not isinstance( + self.field_terminator, bytes + ): + raise TypeError("field_terminator must be bytes or None.") + if self.row_terminator is not None and not isinstance( + self.row_terminator, bytes + ): + raise TypeError("row_terminator must be bytes or None.") + + @dataclass class BCPOptions: """ @@ -39,8 +68,12 @@ class BCPOptions: keep_nulls (bool): Keep null values. Option: (-k). hints (Optional[str]): Additional hints. Option: (-h). bulk_mode (str): Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w). + Defaults to "native". Native format is typically the most performant for + SQL Server to SQL Server data transfers as it uses the database's internal + data representation, minimizing conversions and preserving data fidelity. columns (List[ColumnFormat]): Column formats. Option: (format_file) or (columns). """ + direction: Literal["in", "out"] data_file: str error_file: Optional[str] = None @@ -55,4 +88,40 @@ class BCPOptions: keep_nulls: bool = False hints: Optional[str] = None bulk_mode: Literal["native", "char", "unicode"] = "native" - columns: List[ColumnFormat] = field(default_factory=list) \ No newline at end of file + columns: List[ColumnFormat] = field(default_factory=list) + + def __post_init__(self): + if self.direction not in ["in", "out"]: + raise ValueError("direction must be 'in' or 'out'.") + if not self.data_file: + raise ValueError("data_file must not be an empty string.") + if self.error_file is not None and not self.error_file: + raise ValueError("error_file, if provided, must not be an empty string.") + if self.format_file is not None and not self.format_file: + raise ValueError("format_file, if provided, must not be an empty string.") + if self.write_format_file is not None and not self.write_format_file: + raise ValueError( + "write_format_file, if provided, must not be an empty string." + ) + if self.batch_size is not None and self.batch_size <= 0: + raise ValueError("batch_size must be a positive integer.") + if self.max_errors is not None and self.max_errors < 0: + raise ValueError("max_errors must be a non-negative integer.") + if self.first_row is not None and self.first_row <= 0: + raise ValueError("first_row must be a positive integer.") + if self.last_row is not None and self.last_row <= 0: + raise ValueError("last_row must be a positive integer.") + if self.last_row is not None and self.first_row is None: + raise ValueError("first_row must be specified if last_row is specified.") + if ( + self.first_row is not None + and self.last_row is not None + and self.last_row < self.first_row + ): + raise ValueError("last_row must be greater than or equal to first_row.") + if self.code_page is not None and not self.code_page: + raise ValueError("code_page, if provided, must not be an empty string.") + if self.hints is not None and not self.hints: + raise ValueError("hints, if provided, must not be an empty string.") + if self.bulk_mode not in ["native", "char", "unicode"]: + raise ValueError("bulk_mode must be 'native', 'char', or 'unicode'.") From 68af747e55d6c603e855169e9ab35150f44728ad Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Thu, 22 May 2025 22:29:21 +0530 Subject: [PATCH 4/4] FEAT: BCP OPTION - Adding validation for each feild --- mssql_python/bcp_options.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/mssql_python/bcp_options.py b/mssql_python/bcp_options.py index 67b65210..7dab82d5 100644 --- a/mssql_python/bcp_options.py +++ b/mssql_python/bcp_options.py @@ -54,11 +54,10 @@ class BCPOptions: """ Represents the options for a bulk copy operation. Attributes: - direction (Literal(str)): 'in' or 'out'. Option: (-i or -o). + direction (Literal[str]): 'in' or 'out'. Option: (-i or -o). data_file (str): The data file. Option: (positional argument). error_file (Optional[str]): The error file. Option: (-e). - format_file (Optional[str]): The format file. Option: (-f). - write_format_file (Optional[str]): Write a format file. Option: (-x). + format_file (Optional[str]): The format file to use for 'in'/'out'. Option: (-f). batch_size (Optional[int]): The batch size. Option: (-b). max_errors (Optional[int]): The maximum number of errors allowed. Option: (-m). first_row (Optional[int]): The first row to process. Option: (-F). @@ -68,17 +67,15 @@ class BCPOptions: keep_nulls (bool): Keep null values. Option: (-k). hints (Optional[str]): Additional hints. Option: (-h). bulk_mode (str): Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w). - Defaults to "native". Native format is typically the most performant for - SQL Server to SQL Server data transfers as it uses the database's internal - data representation, minimizing conversions and preserving data fidelity. - columns (List[ColumnFormat]): Column formats. Option: (format_file) or (columns). + Defaults to "native". + columns (List[ColumnFormat]): Column formats. """ direction: Literal["in", "out"] - data_file: str + data_file: str # data_file is mandatory for 'in' and 'out' error_file: Optional[str] = None format_file: Optional[str] = None - write_format_file: Optional[str] = None + # write_format_file is removed as 'format' direction is not actively supported batch_size: Optional[int] = None max_errors: Optional[int] = None first_row: Optional[int] = None @@ -94,15 +91,12 @@ def __post_init__(self): if self.direction not in ["in", "out"]: raise ValueError("direction must be 'in' or 'out'.") if not self.data_file: - raise ValueError("data_file must not be an empty string.") - if self.error_file is not None and not self.error_file: - raise ValueError("error_file, if provided, must not be an empty string.") + raise ValueError("data_file must be provided and non-empty for 'in' or 'out' directions.") + if self.error_file is None or not self.error_file: # Making error_file mandatory for in/out + raise ValueError("error_file must be provided and non-empty for 'in' or 'out' directions.") + if self.format_file is not None and not self.format_file: raise ValueError("format_file, if provided, must not be an empty string.") - if self.write_format_file is not None and not self.write_format_file: - raise ValueError( - "write_format_file, if provided, must not be an empty string." - ) if self.batch_size is not None and self.batch_size <= 0: raise ValueError("batch_size must be a positive integer.") if self.max_errors is not None and self.max_errors < 0: