Skip to content

Commit

Permalink
CLN: Remove pickle support pre-pandas 1.0 (pandas-dev#57555)
Browse files Browse the repository at this point in the history
* Centeralize methods to class

* Cleanups

* CLN: Remove pickle support pre-pandas 1.0

* Typing

* clean:
  • Loading branch information
mroeschke authored and pmhatre1 committed May 7, 2024
1 parent 7053398 commit 34d8c90
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 180 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Other API changes
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
-
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)

.. ---------------------------------------------------------------------------
.. _whatsnew_300.deprecations:
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ cdef _nat_rdivide_op(self, other):
return NotImplemented


def __nat_unpickle(*args):
def _nat_unpickle(*args):
# return constant defined in the module
return c_NaT

Expand Down Expand Up @@ -360,7 +360,7 @@ class NaTType(_NaT):
return self.__reduce__()

def __reduce__(self):
return (__nat_unpickle, (None, ))
return (_nat_unpickle, (None, ))

def __rtruediv__(self, other):
return _nat_rdivide_op(self, other)
Expand Down
221 changes: 49 additions & 172 deletions pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""
Support pre-0.12 series pickle compatibility.
Pickle compatibility to pandas version 1.0
"""
from __future__ import annotations

import contextlib
import copy
import io
import pickle as pkl
import pickle
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -17,7 +16,6 @@
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import BaseOffset

from pandas import Index
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
Expand All @@ -29,111 +27,20 @@
from collections.abc import Generator


def load_reduce(self) -> None:
stack = self.stack
args = stack.pop()
func = stack[-1]

try:
stack[-1] = func(*args)
return
except TypeError as err:
# If we have a deprecated function,
# try to replace and try again.

msg = "_reconstruct: First argument must be a sub-type of ndarray"

if msg in str(err):
try:
cls = args[0]
stack[-1] = object.__new__(cls)
return
except TypeError:
pass
elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return

raise


# If classes are moved, provide compat here.
_class_locations_map = {
("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
# 15477
("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
# Re-routing unpickle block logic to go through _unpickle_block instead
# for pandas <= 1.3.5
("pandas.core.internals.blocks", "new_block"): (
"pandas._libs.internals",
"_unpickle_block",
),
("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
# 10890
("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
("pandas.sparse.series", "SparseTimeSeries"): (
"pandas.core.sparse.series",
"SparseSeries",
),
# 12588, extensions moving
("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
# 18543 moving period
("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
("pandas.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
("pandas._libs.tslib", "__nat_unpickle"): (
# Avoid Cython's warning "contradiction to to Python 'class private name' rules"
("pandas._libs.tslibs.nattype", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
# 15998 top-level dirs moving
("pandas.sparse.array", "SparseArray"): (
"pandas.core.arrays.sparse",
"SparseArray",
),
("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
("pandas.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
("pandas.tseries.index", "_new_DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"_new_DatetimeIndex",
),
("pandas.tseries.index", "DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"DatetimeIndex",
"_nat_unpickle",
),
("pandas.tseries.period", "PeriodIndex"): (
"pandas.core.indexes.period",
"PeriodIndex",
),
# 19269, arrays moving
("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
# 19939, add timedeltaindex, float64index compat from 15998 move
("pandas.tseries.tdi", "TimedeltaIndex"): (
"pandas.core.indexes.timedeltas",
"TimedeltaIndex",
),
("pandas.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
# 50775, remove Int64Index, UInt64Index & Float64Index from codabase
# 50775, remove Int64Index, UInt64Index & Float64Index from codebase
("pandas.core.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index",
Expand All @@ -155,85 +62,55 @@ def load_reduce(self) -> None:

# our Unpickler sub-class to override methods and some dispatcher
# functions for compat and uses a non-public class of the pickle module.


class Unpickler(pkl._Unpickler):
class Unpickler(pickle._Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
return super().find_class(module, name)

dispatch = pickle._Unpickler.dispatch.copy()

Unpickler.dispatch = copy.copy(Unpickler.dispatch)
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce


def load_newobj(self) -> None:
args = self.stack.pop()
cls = self.stack[-1]

# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
elif issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
obj = cls.__new__(cls, *args)

self.stack[-1] = obj


Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj


def load_newobj_ex(self) -> None:
kwargs = self.stack.pop()
args = self.stack.pop()
cls = self.stack.pop()

# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args, **kwargs)
self.append(obj)

def load_reduce(self) -> None:
stack = self.stack # type: ignore[attr-defined]
args = stack.pop()
func = stack[-1]

try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
except (AttributeError, KeyError):
pass


def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any:
"""
Load a pickle, with a provided encoding,
Parameters
----------
fh : a filelike object
encoding : an optional encoding
is_verbose : show exception output
"""
try:
fh.seek(0)
if encoding is not None:
up = Unpickler(fh, encoding=encoding)
try:
stack[-1] = func(*args)
except TypeError:
# If we have a deprecated function,
# try to replace and try again.
if args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
raise

dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment]

def load_newobj(self) -> None:
args = self.stack.pop() # type: ignore[attr-defined]
cls = self.stack.pop() # type: ignore[attr-defined]

# compat
if issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
up = Unpickler(fh)
# "Unpickler" has no attribute "is_verbose" [attr-defined]
up.is_verbose = is_verbose # type: ignore[attr-defined]
obj = cls.__new__(cls, *args)
self.append(obj) # type: ignore[attr-defined]

return up.load()
except (ValueError, TypeError):
raise
dispatch[pickle.NEWOBJ[0]] = load_newobj # type: ignore[assignment]


def loads(
Expand All @@ -257,9 +134,9 @@ def patch_pickle() -> Generator[None, None, None]:
"""
Temporarily patch pickle to use our unpickler.
"""
orig_loads = pkl.loads
orig_loads = pickle.loads
try:
setattr(pkl, "loads", loads)
setattr(pickle, "loads", loads)
yield
finally:
setattr(pkl, "loads", orig_loads)
setattr(pickle, "loads", orig_loads)
10 changes: 5 additions & 5 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
import warnings

from pandas.compat import pickle_compat as pc
from pandas.compat import pickle_compat
from pandas.util._decorators import doc

from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -158,7 +158,7 @@ def read_pickle(
Notes
-----
read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3
read_pickle is only guaranteed to be backwards compatible to pandas 1.0
provided the object was serialized with to_pickle.
Examples
Expand Down Expand Up @@ -195,7 +195,6 @@ def read_pickle(
) as handles:
# 1) try standard library Pickle
# 2) try pickle_compat (older pandas version) to handle subclass changes

try:
with warnings.catch_warnings(record=True):
# We want to silence any warnings about, e.g. moved modules.
Expand All @@ -204,5 +203,6 @@ def read_pickle(
except excs_to_catch:
# e.g.
# "No module named 'pandas.core.sparse.series'"
# "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
return pc.load(handles.handle, encoding=None)
# "Can't get attribute '_nat_unpickle' on <module 'pandas._libs.tslib"
handles.handle.seek(0)
return pickle_compat.Unpickler(handles.handle).load()

0 comments on commit 34d8c90

Please sign in to comment.