From 94d179a3b1f2fcaa8d45f1466f95a2e24d942b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Thu, 16 Oct 2025 13:43:07 -0300 Subject: [PATCH] refactor(parser): remove redundant integer limit argument --- .../_libs/include/pandas/parser/pd_parser.h | 15 +++++------- .../_libs/include/pandas/parser/tokenizer.h | 7 +++--- pandas/_libs/parsers.pyx | 23 +++++-------------- pandas/_libs/src/parser/tokenizer.c | 13 +++++------ 4 files changed, 21 insertions(+), 37 deletions(-) diff --git a/pandas/_libs/include/pandas/parser/pd_parser.h b/pandas/_libs/include/pandas/parser/pd_parser.h index 543839b5d75bf..923d83123e3e2 100644 --- a/pandas/_libs/include/pandas/parser/pd_parser.h +++ b/pandas/_libs/include/pandas/parser/pd_parser.h @@ -37,9 +37,8 @@ typedef struct { int (*parser_trim_buffers)(parser_t *); int (*tokenize_all_rows)(parser_t *, const char *); int (*tokenize_nrows)(parser_t *, size_t, const char *); - int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char); - uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t, - int *, char); + int64_t (*str_to_int64)(const char *, int *, char); + uint64_t (*str_to_uint64)(uint_state *, const char *, int *, char); double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *); double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *, int *); @@ -87,12 +86,10 @@ static PandasParser_CAPI *PandasParserAPI = NULL; PandasParserAPI->tokenize_all_rows((self), (encoding_errors)) #define tokenize_nrows(self, nrows, encoding_errors) \ PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors)) -#define str_to_int64(p_item, int_min, int_max, error, t_sep) \ - PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error), \ - (t_sep)) -#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep) \ - PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max), \ - (error), (t_sep)) +#define str_to_int64(p_item, error, t_sep) \ + PandasParserAPI->str_to_int64((p_item), (error), (t_sep)) +#define str_to_uint64(state, p_item, error, t_sep) \ + PandasParserAPI->str_to_uint64((state), (p_item), (error), (t_sep)) #define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \ PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep), \ (skip_trailing), (error), (maybe_int)) diff --git a/pandas/_libs/include/pandas/parser/tokenizer.h b/pandas/_libs/include/pandas/parser/tokenizer.h index 209f375a5bf6c..19287a30b9d4d 100644 --- a/pandas/_libs/include/pandas/parser/tokenizer.h +++ b/pandas/_libs/include/pandas/parser/tokenizer.h @@ -208,10 +208,9 @@ void uint_state_init(uint_state *self); int uint64_conflict(uint_state *self); -uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, - uint64_t uint_max, int *error, char tsep); -int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, - int *error, char tsep); +uint64_t str_to_uint64(uint_state *state, const char *p_item, int *error, + char tsep); +int64_t str_to_int64(const char *p_item, int *error, char tsep); double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing, int *error, int *maybe_int); double precise_xstrtod(const char *p, char **q, char decimal, char sci, diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 442891949dfd2..010e8b1283254 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -63,11 +63,6 @@ from numpy cimport ( cnp.import_array() from pandas._libs cimport util -from pandas._libs.util cimport ( - INT64_MAX, - INT64_MIN, - UINT64_MAX, -) from pandas._libs import lib @@ -281,10 +276,8 @@ cdef extern from "pandas/parser/pd_parser.h": int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil - int64_t str_to_int64(char *p_item, int64_t int_min, - int64_t int_max, int *error, char tsep) nogil - uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, - uint64_t uint_max, int *error, char tsep) nogil + int64_t str_to_int64(char *p_item, int *error, char tsep) nogil + uint64_t str_to_uint64(uint_state *state, char *p_item, int *error, char tsep) nogil double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing, @@ -1855,15 +1848,13 @@ cdef int _try_uint64_nogil(parser_t *parser, int64_t col, data[i] = 0 continue - data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, - &error, parser.thousands) + data[i] = str_to_uint64(state, word, &error, parser.thousands) if error != 0: return error else: for i in range(lines): COLITER_NEXT(it, word) - data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, - &error, parser.thousands) + data[i] = str_to_uint64(state, word, &error, parser.thousands) if error != 0: return error @@ -1920,15 +1911,13 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col, data[i] = NA continue - data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, - &error, parser.thousands) + data[i] = str_to_int64(word, &error, parser.thousands) if error != 0: return error else: for i in range(lines): COLITER_NEXT(it, word) - data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, - &error, parser.thousands) + data[i] = str_to_int64(word, &error, parser.thousands) if error != 0: return error diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index b77e8ab2254a3..a5cfd0e13ceec 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -1872,8 +1872,7 @@ static int copy_string_without_char(char output[PROCESSED_WORD_CAPACITY], return 0; } -int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, - int *error, char tsep) { +int64_t str_to_int64(const char *p_item, int *error, char tsep) { const char *p = p_item; // Skip leading spaces. while (isspace_ascii(*p)) { @@ -1907,7 +1906,7 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, char *endptr; int64_t number = strtoll(p, &endptr, 10); - if (errno == ERANGE || number > int_max || number < int_min) { + if (errno == ERANGE) { *error = ERROR_OVERFLOW; errno = 0; return 0; @@ -1928,8 +1927,8 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, return number; } -uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, - uint64_t uint_max, int *error, char tsep) { +uint64_t str_to_uint64(uint_state *state, const char *p_item, int *error, + char tsep) { const char *p = p_item; // Skip leading spaces. while (isspace_ascii(*p)) { @@ -1967,7 +1966,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, char *endptr; uint64_t number = strtoull(p, &endptr, 10); - if (errno == ERANGE || number > uint_max) { + if (errno == ERANGE) { *error = ERROR_OVERFLOW; errno = 0; return 0; @@ -1984,7 +1983,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, return 0; } - if (number > (uint64_t)int_max) { + if (number > (uint64_t)INT64_MAX) { state->seen_uint = 1; }