Skip to content

Commit

Permalink
Re-land PyCapsule (#52021)
Browse files Browse the repository at this point in the history
* Revert "Revert "Use PyCapsule for internal datetime functions" (#51951)"

This reverts commit 8487b2e.

* Revert "Revert "Re-enable parallel builds in CI" (#51952)"

This reverts commit c8ea34c.

* Revert "Revert "CI: parallel build follow up" (#51984)"

This reverts commit 93b5135.
  • Loading branch information
mroeschke committed Mar 18, 2023
1 parent c73c1c8 commit 4ab82d0
Show file tree
Hide file tree
Showing 38 changed files with 754 additions and 233 deletions.
3 changes: 1 addition & 2 deletions .circleci/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ if pip list | grep -q ^pandas; then
fi

echo "Build extensions"
# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
python setup.py build_ext -q -j1
python setup.py build_ext -q -j4

echo "Install pandas"
python -m pip install --no-build-isolation --no-use-pep517 -e .
Expand Down
6 changes: 2 additions & 4 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,5 @@ runs:
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
shell: bash -el {0}
env:
# Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
# GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
N_JOBS: 1
#N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }}
2 changes: 1 addition & 1 deletion .github/actions/setup-conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ runs:
environment-name: ${{ inputs.environment-name }}
extra-specs: ${{ inputs.extra-specs }}
channels: conda-forge
channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
channel-priority: 'strict'
condarc-file: ci/condarc.yml
cache-env: true
cache-downloads: true
2 changes: 1 addition & 1 deletion .github/workflows/32-bit-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
python -m pip install versioneer[toml] && \
python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
python setup.py build_ext -q -j1 && \
python setup.py build_ext -q -j$(nproc) && \
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
python -m pip list && \
export PANDAS_CI=1 && \
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/python-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,9 @@ jobs:
python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip list
# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
- name: Build Pandas
run: |
python setup.py build_ext -q -j1
python setup.py build_ext -q -j4
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
- name: Build Version
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,5 @@ prune pandas/tests/io/parser/data
# Selectively re-add *.cxx files that were excluded above
graft pandas/_libs/src
graft pandas/_libs/tslibs/src
include pandas/_libs/pd_parser.h
include pandas/_libs/pd_parser.c
5 changes: 5 additions & 0 deletions pandas/_libs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
]


# Below imports needs to happen first to ensure pandas top level
# module gets monkeypatched with the pandas_datetime_CAPI
# see pandas_datetime_exec in pd_datetime.c
import pandas._libs.pandas_parser # noqa # isort: skip # type: ignore[reportUnusedImport]
import pandas._libs.pandas_datetime # noqa # isort: skip # type: ignore[reportUnusedImport]
from pandas._libs.interval import Interval
from pandas._libs.tslibs import (
NaT,
Expand Down
5 changes: 5 additions & 0 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
get_unit_from_dtype,
import_pandas_datetime,
)

import_pandas_datetime()


from pandas._libs.tslibs.period cimport is_period_object
from pandas._libs.tslibs.timedeltas cimport _Timedelta
from pandas._libs.tslibs.timestamps cimport _Timestamp
Expand Down
6 changes: 4 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ cdef extern from "numpy/arrayobject.h":
cdef extern from "numpy/ndarrayobject.h":
bint PyArray_CheckScalar(obj) nogil


cdef extern from "src/parse_helper.h":
cdef extern from "pd_parser.h":
int floatify(object, float64_t *result, int *maybe_int) except -1
void PandasParser_IMPORT()

PandasParser_IMPORT

from pandas._libs cimport util
from pandas._libs.util cimport (
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ from pandas._libs.tslibs.np_datetime cimport (
get_datetime64_unit,
get_datetime64_value,
get_timedelta64_value,
import_pandas_datetime,
)

import_pandas_datetime()

from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op

cdef:
Expand Down
73 changes: 53 additions & 20 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h":
int64_t skip_first_N_rows
int64_t skipfooter
# pick one, depending on whether the converter requires GIL
float64_t (*double_converter)(const char *, char **,
char, char, char,
int, int *, int *) nogil
double (*double_converter)(const char *, char **,
char, char, char,
int, int *, int *) nogil

# error handling
char *warn_msg
Expand All @@ -249,6 +249,16 @@ cdef extern from "parser/tokenizer.h":
int seen_uint
int seen_null

void COLITER_NEXT(coliter_t, const char *) nogil

cdef extern from "pd_parser.h":
void *new_rd_source(object obj) except NULL

int del_rd_source(void *src)

void* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status, const char *encoding_errors)

void uint_state_init(uint_state *self)
int uint64_conflict(uint_state *self)

Expand Down Expand Up @@ -279,26 +289,49 @@ cdef extern from "parser/tokenizer.h":
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep) nogil

float64_t xstrtod(const char *p, char **q, char decimal,
double xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
double precise_xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
double round_trip(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
float64_t precise_xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
float64_t round_trip(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil

int to_boolean(const char *item, uint8_t *val) nogil

void PandasParser_IMPORT()

cdef extern from "parser/io.h":
void *new_rd_source(object obj) except NULL
PandasParser_IMPORT

int del_rd_source(void *src)
# When not invoked directly but rather assigned as a function,
# cdef extern'ed declarations seem to leave behind an undefined symbol
cdef double xstrtod_wrapper(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil:
return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)

void* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status, const char *encoding_errors)

cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil:
return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)


cdef double round_trip_wrapper(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil:
return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)


cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
size_t *bytes_read, int *status,
const char *encoding_errors) noexcept:
return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)

cdef int del_rd_source_wrapper(void *src) noexcept:
return del_rd_source(src)


cdef class TextReader:
Expand Down Expand Up @@ -485,11 +518,11 @@ cdef class TextReader:

if float_precision == "round_trip":
# see gh-15140
self.parser.double_converter = round_trip
self.parser.double_converter = round_trip_wrapper
elif float_precision == "legacy":
self.parser.double_converter = xstrtod
self.parser.double_converter = xstrtod_wrapper
elif float_precision == "high" or float_precision is None:
self.parser.double_converter = precise_xstrtod
self.parser.double_converter = precise_xstrtod_wrapper
else:
raise ValueError(f"Unrecognized float_precision option: "
f"{float_precision}")
Expand Down Expand Up @@ -607,8 +640,8 @@ cdef class TextReader:

ptr = new_rd_source(source)
self.parser.source = ptr
self.parser.cb_io = &buffer_rd_bytes
self.parser.cb_cleanup = &del_rd_source
self.parser.cb_io = buffer_rd_bytes_wrapper
self.parser.cb_cleanup = del_rd_source_wrapper

cdef _get_header(self, list prelim_header):
# header is now a list of lists, so field_count should use header[0]
Expand Down

0 comments on commit 4ab82d0

Please sign in to comment.