Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions pandas/_libs/include/pandas/parser/pd_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ typedef struct {
int (*parser_trim_buffers)(parser_t *);
int (*tokenize_all_rows)(parser_t *, const char *);
int (*tokenize_nrows)(parser_t *, size_t, const char *);
int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char);
uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t,
int *, char);
int64_t (*str_to_int64)(const char *, int *, char);
uint64_t (*str_to_uint64)(uint_state *, const char *, int *, char);
double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *);
double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *,
int *);
Expand Down Expand Up @@ -87,12 +86,10 @@ static PandasParser_CAPI *PandasParserAPI = NULL;
PandasParserAPI->tokenize_all_rows((self), (encoding_errors))
#define tokenize_nrows(self, nrows, encoding_errors) \
PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors))
#define str_to_int64(p_item, int_min, int_max, error, t_sep) \
PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error), \
(t_sep))
#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep) \
PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max), \
(error), (t_sep))
#define str_to_int64(p_item, error, t_sep) \
PandasParserAPI->str_to_int64((p_item), (error), (t_sep))
#define str_to_uint64(state, p_item, error, t_sep) \
PandasParserAPI->str_to_uint64((state), (p_item), (error), (t_sep))
#define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \
PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep), \
(skip_trailing), (error), (maybe_int))
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/include/pandas/parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,9 @@ void uint_state_init(uint_state *self);

int uint64_conflict(uint_state *self);

uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep);
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
int *error, char tsep);
uint64_t str_to_uint64(uint_state *state, const char *p_item, int *error,
char tsep);
int64_t str_to_int64(const char *p_item, int *error, char tsep);
double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
int skip_trailing, int *error, int *maybe_int);
double precise_xstrtod(const char *p, char **q, char decimal, char sci,
Expand Down
23 changes: 6 additions & 17 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,6 @@ from numpy cimport (
cnp.import_array()

from pandas._libs cimport util
from pandas._libs.util cimport (
INT64_MAX,
INT64_MIN,
UINT64_MAX,
)

from pandas._libs import lib

Expand Down Expand Up @@ -281,10 +276,8 @@ cdef extern from "pandas/parser/pd_parser.h":
int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil
int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil

int64_t str_to_int64(char *p_item, int64_t int_min,
int64_t int_max, int *error, char tsep) nogil
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep) nogil
int64_t str_to_int64(char *p_item, int *error, char tsep) nogil
uint64_t str_to_uint64(uint_state *state, char *p_item, int *error, char tsep) nogil

double xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
Expand Down Expand Up @@ -1855,15 +1848,13 @@ cdef int _try_uint64_nogil(parser_t *parser, int64_t col,
data[i] = 0
continue

data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
&error, parser.thousands)
data[i] = str_to_uint64(state, word, &error, parser.thousands)
if error != 0:
return error
else:
for i in range(lines):
COLITER_NEXT(it, word)
data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
&error, parser.thousands)
data[i] = str_to_uint64(state, word, &error, parser.thousands)
if error != 0:
return error

Expand Down Expand Up @@ -1920,15 +1911,13 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
data[i] = NA
continue

data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
&error, parser.thousands)
data[i] = str_to_int64(word, &error, parser.thousands)
if error != 0:
return error
else:
for i in range(lines):
COLITER_NEXT(it, word)
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
&error, parser.thousands)
data[i] = str_to_int64(word, &error, parser.thousands)
if error != 0:
return error

Expand Down
13 changes: 6 additions & 7 deletions pandas/_libs/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1872,8 +1872,7 @@ static int copy_string_without_char(char output[PROCESSED_WORD_CAPACITY],
return 0;
}

int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
int *error, char tsep) {
int64_t str_to_int64(const char *p_item, int *error, char tsep) {
const char *p = p_item;
// Skip leading spaces.
while (isspace_ascii(*p)) {
Expand Down Expand Up @@ -1907,7 +1906,7 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
char *endptr;
int64_t number = strtoll(p, &endptr, 10);

if (errno == ERANGE || number > int_max || number < int_min) {
if (errno == ERANGE) {
*error = ERROR_OVERFLOW;
errno = 0;
return 0;
Expand All @@ -1928,8 +1927,8 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
return number;
}

uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep) {
uint64_t str_to_uint64(uint_state *state, const char *p_item, int *error,
char tsep) {
const char *p = p_item;
// Skip leading spaces.
while (isspace_ascii(*p)) {
Expand Down Expand Up @@ -1967,7 +1966,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
char *endptr;
uint64_t number = strtoull(p, &endptr, 10);

if (errno == ERANGE || number > uint_max) {
if (errno == ERANGE) {
*error = ERROR_OVERFLOW;
errno = 0;
return 0;
Expand All @@ -1984,7 +1983,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
return 0;
}

if (number > (uint64_t)int_max) {
if (number > (uint64_t)INT64_MAX) {
state->seen_uint = 1;
}

Expand Down
Loading