pandas-dev · jzwick · Oct 5, 2025 · Oct 5, 2025 · Oct 8, 2025 · Oct 5, 2025
@@ -6,7 +6,7 @@ on:
 
 jobs:
   cleanup:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     if: github.repository_owner == 'pandas-dev'
     permissions:
       actions: write

@@ -6,7 +6,8 @@ on:
 
 jobs:
   cleanup:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
+    if: github.repository_owner == 'pandas-dev'
     steps:
       - name: Clean Cache
         run: |

@@ -9,15 +9,6 @@ permissions:
   pull-requests: write
 
 jobs:
-  issue_assign:
-    runs-on: ubuntu-24.04
-    if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
-    concurrency:
-      group: ${{ github.actor }}-issue-assign
-    steps:
-      - run: |
-          echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
-          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
   preview_docs:
     runs-on: ubuntu-24.04
     if: github.event.issue.pull_request && github.event.comment.body == '/preview'
@@ -28,64 +19,3 @@ jobs:
         with:
           previewer-server: "https://pandas.pydata.org/preview"
           artifact-job: "Doc Build and Upload"
-  asv_run:
-    runs-on: ubuntu-24.04
-    # TODO: Support more benchmarking options later, against different branches, against self, etc
-    if: github.event.issue.pull_request && startsWith(github.event.comment.body, '@github-actions benchmark')
-    defaults:
-      run:
-        shell: bash -el {0}
-    env:
-      ENV_FILE: environment.yml
-      COMMENT: ${{github.event.comment.body}}
-
-    concurrency:
-      # Set concurrency to prevent abuse(full runs are ~5.5 hours !!!)
-      # each user can only run one concurrent benchmark bot at a time
-      # We don't cancel in progress jobs, but if you want to benchmark multiple PRs, you're gonna have
-      # to wait
-      group: ${{ github.actor }}-asv
-      cancel-in-progress: false
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v5
-        with:
-          fetch-depth: 0
-
-        # Although asv sets up its own env, deps are still needed
-        # during discovery process
-      - name: Set up Conda
-        uses: ./.github/actions/setup-conda
-
-      - name: Run benchmarks
-        id: bench
-        continue-on-error: true # asv will exit code 1 for regressions
-        run: |
-          # extracting the regex, see https://stackoverflow.com/a/36798723
-          REGEX=$(echo "$COMMENT" | sed -n "s/^.*-b\s*\(\S*\).*$/\1/p")
-          cd asv_bench
-          asv check -E existing
-          git remote add upstream https://github.com/pandas-dev/pandas.git
-          git fetch upstream
-          asv machine --yes
-          asv continuous -f 1.1 -b $REGEX upstream/main HEAD
-          echo 'BENCH_OUTPUT<<EOF' >> $GITHUB_ENV
-          asv compare -f 1.1 upstream/main HEAD >> $GITHUB_ENV
-          echo 'EOF' >> $GITHUB_ENV
-          echo "REGEX=$REGEX" >> $GITHUB_ENV
-
-      - uses: actions/github-script@v8
-        env:
-          BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
-          REGEX: ${{env.REGEX}}
-        with:
-          script: |
-            const ENV_VARS = process.env
-            const run_url = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: '\nBenchmarks completed. View runner logs here.' + run_url + '\nRegex used: '+ 'regex ' + ENV_VARS["REGEX"] + '\n' + ENV_VARS["BENCH_OUTPUT"]
-            })
@@ -182,7 +182,7 @@ jobs:
     strategy:
       matrix:
         # Note: Don't use macOS latest since macos 14 appears to be arm64 only
-        os: [macos-13, macos-14, windows-latest]
+        os: [macos-13, macos-14, windows-2025]
         env_file: [actions-311.yaml, actions-312.yaml, actions-313.yaml]
       fail-fast: false
     runs-on: ${{ matrix.os }}
@@ -322,7 +322,7 @@ jobs:
       fail-fast: false
       matrix:
         # Separate out macOS 13 and 14, since macOS 14 is arm64 only
-        os: [ubuntu-24.04, macOS-13, macOS-14, windows-latest]
+        os: [ubuntu-24.04, macOS-13, macOS-14, windows-2025]
 
     timeout-minutes: 90
 

@@ -229,7 +229,7 @@ jobs:
       - build_sdist
       - build_wheels
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
 
     environment:
       name: pypi
@@ -243,6 +243,8 @@ jobs:
         with:
           path: dist          # everything lands in ./dist/**
 
+      # TODO: This step can be probably be achieved by actions/download-artifact@v5
+      # by specifying merge-multiple: true, and a glob pattern
       - name: Collect files
         run: |
           mkdir -p upload

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.11
+    rev: v0.13.3
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -46,7 +46,7 @@ repos:
     -   id: codespell
         types_or: [python, rst, markdown, cython, c]
 -   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.16.7
+    rev: v0.17.0
     hooks:
     -   id: cython-lint
     -   id: double-quote-cython-strings
@@ -67,7 +67,7 @@ repos:
     -   id: trailing-whitespace
         args: [--markdown-linebreak-ext=md]
 -   repo: https://github.com/PyCQA/isort
-    rev: 6.0.1
+    rev: 6.1.0
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
@@ -92,14 +92,14 @@ repos:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v21.1.0
+    rev: v21.1.2
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
       args: [-i]
       types_or: [c, c++]
 -   repo: https://github.com/trim21/pre-commit-mirror-meson
-    rev: v1.9.0
+    rev: v1.9.1
     hooks:
     - id: meson-fmt
       args: ['--inplace']

diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst
@@ -36,16 +36,14 @@ and `good first issue
 <https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+sort%3Aupdated-desc+label%3A%22good+first+issue%22+no%3Aassignee>`_
 are typically good for newer contributors.
 
-Once you've found an interesting issue, it's a good idea to assign the issue to yourself,
-so nobody else duplicates the work on it. On the Github issue, a comment with the exact
-text ``take`` to automatically assign you the issue
-(this will take seconds and may require refreshing the page to see it).
+Once you've found an interesting issue, leave a comment with your intention
+to start working on it. If somebody else has
+already commented on issue but they have shown a lack of activity in the issue
+or a pull request in the past 2-3 weeks, you may take it over.
 
 If for whatever reason you are not able to continue working with the issue, please
-unassign it, so other people know it's available again. You can check the list of
-assigned issues, since people may not be working in them anymore. If you want to work on one
-that is assigned, feel free to kindly ask the current assignee if you can take it
-(please allow at least a week of inactivity before considering work in the issue discontinued).
+leave a comment on an issue, so other people know it's available again. You can check the list of
+assigned issues, since people may not be working in them anymore.
 
 We have several :ref:`contributor community <community>` communication channels, which you are
 welcome to join, and ask questions as you figure things out. Among them are regular meetings for

diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst
@@ -270,6 +270,42 @@ column with another DataFrame's index.
     indexed_df2 = df2.set_index("key")
     pd.merge(df1, indexed_df2, left_on="key", right_index=True)
 
+:meth:`~pandas.merge` also supports joining on multiple columns by passing a list of column names.
+
+.. code-block:: sql
+
+    SELECT *
+    FROM df1_multi
+    INNER JOIN df2_multi
+      ON df1_multi.key1 = df2_multi.key1
+        AND df1_multi.key2 = df2_multi.key2;
+
+.. ipython:: python
+
+    df1_multi = pd.DataFrame({
+        "key1": ["A", "B", "C", "D"],
+        "key2": [1, 2, 3, 4],
+        "value": np.random.randn(4)
+    })
+    df2_multi = pd.DataFrame({
+        "key1": ["B", "D", "D", "E"],
+        "key2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, on=["key1", "key2"])
+
+If the columns have different names between DataFrames, on can be replaced with left_on and
+right_on.
+
+.. ipython:: python
+
+    df2_multi = pd.DataFrame({
+        "key_1": ["B", "D", "D", "E"],
+        "key_2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, left_on=["key1", "key2"], right_on=["key_1", "key_2"])
+
 LEFT OUTER JOIN
 ~~~~~~~~~~~~~~~
 

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -948,6 +948,7 @@ Datetimelike
 - Bug in :class:`Timestamp` constructor failing to raise when given a ``np.datetime64`` object with non-standard unit (:issue:`25611`)
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
+- Bug in :func:`to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format`` raised  ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
 - Bug in :meth:`DataFrame.fillna` raising an ``AssertionError`` instead of ``OutOfBoundsDatetime`` when filling a ``datetime64[ns]`` column with an out-of-bounds timestamp. Now correctly raises ``OutOfBoundsDatetime``. (:issue:`61208`)
@@ -972,6 +973,7 @@ Datetimelike
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
+
 Timedelta
 ^^^^^^^^^
 - Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`)
@@ -1008,8 +1010,8 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
 - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
--
 
 Interval
 ^^^^^^^^
@@ -1079,6 +1081,8 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
+- Bug in :meth:`read_csv` with ``engine="c"`` reading big integers as strings. Now reads them as python integers. (:issue:`51295`)
+- Bug in :meth:`read_csv` with ``engine="c"`` reading large float numbers with preceding integers as strings. Now reads them as floats. (:issue:`51295`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)

diff --git a/pandas/_config/config.py b/pandas/_config/config.py
@@ -271,7 +271,7 @@ def set_option(*args) -> None:
     if not nargs or nargs % 2 != 0:
         raise ValueError("Must provide an even number of non-keyword arguments")
 
-    for k, v in zip(args[::2], args[1::2]):
+    for k, v in zip(args[::2], args[1::2], strict=True):
         key = _get_single_key(k)
 
         opt = _get_registered_option(key)
@@ -502,7 +502,7 @@ def option_context(*args) -> Generator[None]:
             "option_context(pat, val, pat, val...)."
         )
 
-    ops = tuple(zip(args[::2], args[1::2]))
+    ops = tuple(zip(args[::2], args[1::2], strict=True))
     try:
         undo = tuple((pat, get_option(pat)) for pat, val in ops)
         for pat, val in ops: