Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/pandas-dev/pandas into feat…
Browse files Browse the repository at this point in the history
…ure/44764_perf_issue_new
  • Loading branch information
Sylvain MARIE committed Apr 2, 2024
2 parents b0f73f5 + 0bfce5f commit 084f124
Show file tree
Hide file tree
Showing 184 changed files with 1,821 additions and 2,488 deletions.
4 changes: 0 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ jobs:
no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
command: |
pip3 install cibuildwheel==2.15.0
# When this is a nightly wheel build, allow picking up NumPy 2.0 dev wheels:
if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then
export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
fi
cibuildwheel --prerelease-pythons --output-dir wheelhouse
environment:
Expand Down
12 changes: 0 additions & 12 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,6 @@ jobs:
CIBW_PRERELEASE_PYTHONS: True
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

- name: Build nightly wheels (with NumPy pre-release)
if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }}
uses: pypa/cibuildwheel@v2.17.0
with:
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
env:
# The nightly wheels should be build witht he NumPy 2.0 pre-releases
# which requires the additional URL.
CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
CIBW_PRERELEASE_PYTHONS: True
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

- name: Set up Python
uses: mamba-org/setup-micromamba@v1
with:
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ci:
skip: [pylint, pyright, mypy]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.1
rev: v0.3.4
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
Expand All @@ -39,7 +39,7 @@ repos:
- id: ruff-format
exclude: ^scripts
- repo: https://github.com/jendrikseipp/vulture
rev: 'v2.10'
rev: 'v2.11'
hooks:
- id: vulture
entry: python scripts/run_vulture.py
Expand Down Expand Up @@ -93,11 +93,11 @@ repos:
args: [--disable=all, --enable=redefined-outer-name]
stages: [manual]
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
rev: v3.15.2
hooks:
- id: pyupgrade
args: [--py39-plus]
Expand All @@ -116,7 +116,7 @@ repos:
hooks:
- id: sphinx-lint
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v17.0.6
rev: v18.1.2
hooks:
- id: clang-format
files: ^pandas/_libs/src|^pandas/_libs/include
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def setup(self):
)

for col in ("int", "float", "timestamp"):
self.df[col + "_as_str"] = self.df[col].astype(str)
self.df[f"{col}_as_str"] = self.df[col].astype(str)

for col in self.df.columns:
self.df[col] = self.df[col].astype("category")
Expand Down
58 changes: 3 additions & 55 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.assign SA01" \
-i "pandas.DataFrame.at_time PR01" \
-i "pandas.DataFrame.axes SA01" \
-i "pandas.DataFrame.backfill PR01,SA01" \
-i "pandas.DataFrame.bfill SA01" \
-i "pandas.DataFrame.columns SA01" \
-i "pandas.DataFrame.copy SA01" \
Expand All @@ -99,12 +98,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.kurt RT03,SA01" \
-i "pandas.DataFrame.kurtosis RT03,SA01" \
-i "pandas.DataFrame.last_valid_index SA01" \
-i "pandas.DataFrame.mask RT03" \
-i "pandas.DataFrame.max RT03" \
-i "pandas.DataFrame.mean RT03,SA01" \
-i "pandas.DataFrame.median RT03,SA01" \
-i "pandas.DataFrame.min RT03" \
-i "pandas.DataFrame.pad PR01,SA01" \
-i "pandas.DataFrame.plot PR02,SA01" \
-i "pandas.DataFrame.pop SA01" \
-i "pandas.DataFrame.prod RT03" \
Expand All @@ -119,19 +116,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.sparse.to_dense SA01" \
-i "pandas.DataFrame.std PR01,RT03,SA01" \
-i "pandas.DataFrame.sum RT03" \
-i "pandas.DataFrame.swapaxes PR01,SA01" \
-i "pandas.DataFrame.swaplevel SA01" \
-i "pandas.DataFrame.to_feather SA01" \
-i "pandas.DataFrame.to_markdown SA01" \
-i "pandas.DataFrame.to_parquet RT03" \
-i "pandas.DataFrame.to_period SA01" \
-i "pandas.DataFrame.to_timestamp SA01" \
-i "pandas.DataFrame.tz_convert SA01" \
-i "pandas.DataFrame.tz_localize SA01" \
-i "pandas.DataFrame.unstack RT03" \
-i "pandas.DataFrame.value_counts RT03" \
-i "pandas.DataFrame.var PR01,RT03,SA01" \
-i "pandas.DataFrame.where RT03" \
-i "pandas.DatetimeIndex.ceil SA01" \
-i "pandas.DatetimeIndex.date SA01" \
-i "pandas.DatetimeIndex.day SA01" \
Expand Down Expand Up @@ -226,7 +215,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Index.to_list RT03" \
-i "pandas.Index.union PR07,RT03,SA01" \
-i "pandas.Index.unique RT03" \
-i "pandas.Index.value_counts RT03" \
-i "pandas.Index.view GL08" \
-i "pandas.Int16Dtype SA01" \
-i "pandas.Int32Dtype SA01" \
Expand Down Expand Up @@ -400,7 +388,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.list.flatten SA01" \
-i "pandas.Series.list.len SA01" \
-i "pandas.Series.lt PR07,SA01" \
-i "pandas.Series.mask RT03" \
-i "pandas.Series.max RT03" \
-i "pandas.Series.mean RT03,SA01" \
-i "pandas.Series.median RT03,SA01" \
Expand Down Expand Up @@ -477,17 +464,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.to_frame SA01" \
-i "pandas.Series.to_list RT03" \
-i "pandas.Series.to_markdown SA01" \
-i "pandas.Series.to_period SA01" \
-i "pandas.Series.to_string SA01" \
-i "pandas.Series.to_timestamp RT03,SA01" \
-i "pandas.Series.truediv PR07" \
-i "pandas.Series.tz_convert SA01" \
-i "pandas.Series.tz_localize SA01" \
-i "pandas.Series.unstack SA01" \
-i "pandas.Series.update PR07,SA01" \
-i "pandas.Series.value_counts RT03" \
-i "pandas.Series.var PR01,RT03,SA01" \
-i "pandas.Series.where RT03" \
-i "pandas.SparseDtype SA01" \
-i "pandas.Timedelta PR07,SA01" \
-i "pandas.Timedelta.as_unit SA01" \
Expand All @@ -504,7 +484,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timedelta.to_timedelta64 SA01" \
-i "pandas.Timedelta.total_seconds SA01" \
-i "pandas.Timedelta.view SA01" \
-i "pandas.TimedeltaIndex PR01" \
-i "pandas.TimedeltaIndex.as_unit RT03,SA01" \
-i "pandas.TimedeltaIndex.ceil SA01" \
-i "pandas.TimedeltaIndex.components SA01" \
Expand Down Expand Up @@ -682,60 +661,40 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.apply RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.cummax RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.cummin RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.cumprod RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.cumsum RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.filter RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.filter SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.mean RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.median SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.rank RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.resample RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.skew RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.transform RT03" \
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
-i "pandas.core.groupby.SeriesGroupBy.apply RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cummax RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cummin RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cumprod RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cumsum RT03" \
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
-i "pandas.core.groupby.SeriesGroupBy.mean RT03" \
-i "pandas.core.groupby.SeriesGroupBy.median SA01" \
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
-i "pandas.core.groupby.SeriesGroupBy.rank RT03" \
-i "pandas.core.groupby.SeriesGroupBy.resample RT03" \
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-i "pandas.core.groupby.SeriesGroupBy.skew RT03" \
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
-i "pandas.core.groupby.SeriesGroupBy.transform RT03" \
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
-i "pandas.core.resample.Resampler.ffill RT03" \
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
Expand Down Expand Up @@ -797,8 +756,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.io.formats.style.Styler.clear SA01" \
-i "pandas.io.formats.style.Styler.concat RT03,SA01" \
-i "pandas.io.formats.style.Styler.export RT03" \
-i "pandas.io.formats.style.Styler.format RT03" \
-i "pandas.io.formats.style.Styler.format_index RT03" \
-i "pandas.io.formats.style.Styler.from_custom_template SA01" \
-i "pandas.io.formats.style.Styler.hide RT03,SA01" \
-i "pandas.io.formats.style.Styler.highlight_between RT03" \
Expand All @@ -808,7 +765,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.io.formats.style.Styler.highlight_quantile RT03" \
-i "pandas.io.formats.style.Styler.map RT03" \
-i "pandas.io.formats.style.Styler.map_index RT03" \
-i "pandas.io.formats.style.Styler.relabel_index RT03" \
-i "pandas.io.formats.style.Styler.set_caption RT03,SA01" \
-i "pandas.io.formats.style.Styler.set_properties RT03,SA01" \
-i "pandas.io.formats.style.Styler.set_sticky RT03,SA01" \
Expand Down Expand Up @@ -1022,7 +978,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.DateOffset.rule_code GL08" \
-i "pandas.tseries.offsets.Day PR02" \
-i "pandas.tseries.offsets.Day.copy SA01" \
-i "pandas.tseries.offsets.Day.delta GL08" \
-i "pandas.tseries.offsets.Day.freqstr SA01" \
-i "pandas.tseries.offsets.Day.is_on_offset GL08" \
-i "pandas.tseries.offsets.Day.kwds SA01" \
Expand Down Expand Up @@ -1075,7 +1030,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.FY5253Quarter.year_has_extra_week GL08" \
-i "pandas.tseries.offsets.Hour PR02" \
-i "pandas.tseries.offsets.Hour.copy SA01" \
-i "pandas.tseries.offsets.Hour.delta GL08" \
-i "pandas.tseries.offsets.Hour.freqstr SA01" \
-i "pandas.tseries.offsets.Hour.is_on_offset GL08" \
-i "pandas.tseries.offsets.Hour.kwds SA01" \
Expand All @@ -1098,7 +1052,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.LastWeekOfMonth.weekday GL08" \
-i "pandas.tseries.offsets.Micro PR02" \
-i "pandas.tseries.offsets.Micro.copy SA01" \
-i "pandas.tseries.offsets.Micro.delta GL08" \
-i "pandas.tseries.offsets.Micro.freqstr SA01" \
-i "pandas.tseries.offsets.Micro.is_on_offset GL08" \
-i "pandas.tseries.offsets.Micro.kwds SA01" \
Expand All @@ -1109,7 +1062,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.Micro.rule_code GL08" \
-i "pandas.tseries.offsets.Milli PR02" \
-i "pandas.tseries.offsets.Milli.copy SA01" \
-i "pandas.tseries.offsets.Milli.delta GL08" \
-i "pandas.tseries.offsets.Milli.freqstr SA01" \
-i "pandas.tseries.offsets.Milli.is_on_offset GL08" \
-i "pandas.tseries.offsets.Milli.kwds SA01" \
Expand All @@ -1120,7 +1072,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.Milli.rule_code GL08" \
-i "pandas.tseries.offsets.Minute PR02" \
-i "pandas.tseries.offsets.Minute.copy SA01" \
-i "pandas.tseries.offsets.Minute.delta GL08" \
-i "pandas.tseries.offsets.Minute.freqstr SA01" \
-i "pandas.tseries.offsets.Minute.is_on_offset GL08" \
-i "pandas.tseries.offsets.Minute.kwds SA01" \
Expand Down Expand Up @@ -1151,7 +1102,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.MonthEnd.rule_code GL08" \
-i "pandas.tseries.offsets.Nano PR02" \
-i "pandas.tseries.offsets.Nano.copy SA01" \
-i "pandas.tseries.offsets.Nano.delta GL08" \
-i "pandas.tseries.offsets.Nano.freqstr SA01" \
-i "pandas.tseries.offsets.Nano.is_on_offset GL08" \
-i "pandas.tseries.offsets.Nano.kwds SA01" \
Expand Down Expand Up @@ -1184,7 +1134,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.QuarterEnd.startingMonth GL08" \
-i "pandas.tseries.offsets.Second PR02" \
-i "pandas.tseries.offsets.Second.copy SA01" \
-i "pandas.tseries.offsets.Second.delta GL08" \
-i "pandas.tseries.offsets.Second.freqstr SA01" \
-i "pandas.tseries.offsets.Second.is_on_offset GL08" \
-i "pandas.tseries.offsets.Second.kwds SA01" \
Expand Down Expand Up @@ -1217,7 +1166,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.SemiMonthEnd.rule_code GL08" \
-i "pandas.tseries.offsets.Tick GL08" \
-i "pandas.tseries.offsets.Tick.copy SA01" \
-i "pandas.tseries.offsets.Tick.delta GL08" \
-i "pandas.tseries.offsets.Tick.freqstr SA01" \
-i "pandas.tseries.offsets.Tick.is_on_offset GL08" \
-i "pandas.tseries.offsets.Tick.kwds SA01" \
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- beautifulsoup4>=4.11.2
- blosc>=1.21.3
- bottleneck>=1.3.6
- fastparquet>=2023.04.0
- fastparquet>=2023.10.0
- fsspec>=2022.11.0
- html5lib>=1.1
- hypothesis>=6.46.1
Expand Down Expand Up @@ -57,7 +57,7 @@ dependencies:
- zstandard>=0.19.0

- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
- pytest-localserver>=0.7.1
4 changes: 2 additions & 2 deletions ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies:
- beautifulsoup4>=4.11.2
- blosc>=1.21.3
- bottleneck>=1.3.6
- fastparquet>=2023.04.0
- fastparquet>=2023.10.0
- fsspec>=2022.11.0
- html5lib>=1.1
- hypothesis>=6.46.1
Expand Down Expand Up @@ -72,6 +72,6 @@ dependencies:
- pyyaml
- py
- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
5 changes: 1 addition & 4 deletions ci/deps/actions-311-numpydev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ dependencies:
# test dependencies
- pytest>=7.3.2
- pytest-cov
# Once pytest-cov > 4 comes out, unpin this
# Right now, a DeprecationWarning related to rsyncdir
# causes an InternalError within pytest
- pytest-xdist>=2.2.0, <3
- pytest-xdist>=2.2.0
- hypothesis>=6.46.1

# pandas dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- beautifulsoup4>=4.11.2
- blosc>=1.21.3
- bottleneck>=1.3.6
- fastparquet>=2023.04.0
- fastparquet>=2023.10.0
- fsspec>=2022.11.0
- html5lib>=1.1
- hypothesis>=6.46.1
Expand Down Expand Up @@ -57,6 +57,6 @@ dependencies:
- zstandard>=0.19.0

- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- pytest-localserver>=0.7.1

0 comments on commit 084f124

Please sign in to comment.