Skip to content

Commit

Permalink
Write pickle to file-like without intermediate in-memory buffer (#37056)
Browse files Browse the repository at this point in the history
  • Loading branch information
ig248 committed Oct 14, 2020
1 parent 3d29aee commit 0fa47b6
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 15 deletions.
6 changes: 6 additions & 0 deletions asv_bench/benchmarks/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,11 @@ def time_read_pickle(self):
def time_write_pickle(self):
self.df.to_pickle(self.fname)

def peakmem_read_pickle(self):
read_pickle(self.fname)

def peakmem_write_pickle(self):
self.df.to_pickle(self.fname)


from ..pandas_vb_common import setup # noqa: F401 isort:skip
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ Performance improvements
avoiding creating these again, if created on either. This can speed up operations that depend on creating copies of existing indexes (:issue:`36840`)
- Performance improvement in :meth:`RollingGroupby.count` (:issue:`35625`)
- Small performance decrease to :meth:`Rolling.min` and :meth:`Rolling.max` for fixed windows (:issue:`36567`)
- Reduced peak memory usage in :meth:`DataFrame.to_pickle` when using ``protocol=5`` in python 3.8+ (:issue:`34244`)
- Performance improvement in :class:`ExpandingGroupby` (:issue:`37064`)

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def to_pickle(
if protocol < 0:
protocol = pickle.HIGHEST_PROTOCOL
try:
f.write(pickle.dumps(obj, protocol=protocol))
pickle.dump(obj, f, protocol=protocol)
finally:
if f != filepath_or_buffer:
# do not close user-provided file objects GH 35679
Expand Down
44 changes: 30 additions & 14 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""
import bz2
import datetime
import functools
import glob
import gzip
import io
Expand All @@ -24,7 +25,7 @@

import pytest

from pandas.compat import get_lzma_file, import_lzma, is_platform_little_endian
from pandas.compat import PY38, get_lzma_file, import_lzma, is_platform_little_endian
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -155,28 +156,43 @@ def test_pickles(current_pickle_data, legacy_pickle):
compare(current_pickle_data, legacy_pickle, version)


def test_round_trip_current(current_pickle_data):
def python_pickler(obj, path):
with open(path, "wb") as fh:
pickle.dump(obj, fh, protocol=-1)
def python_pickler(obj, path):
with open(path, "wb") as fh:
pickle.dump(obj, fh, protocol=-1)

def python_unpickler(path):
with open(path, "rb") as fh:
fh.seek(0)
return pickle.load(fh)

def python_unpickler(path):
with open(path, "rb") as fh:
fh.seek(0)
return pickle.load(fh)


@pytest.mark.parametrize(
"pickle_writer",
[
pytest.param(python_pickler, id="python"),
pytest.param(pd.to_pickle, id="pandas_proto_default"),
pytest.param(
functools.partial(pd.to_pickle, protocol=pickle.HIGHEST_PROTOCOL),
id="pandas_proto_highest",
),
pytest.param(functools.partial(pd.to_pickle, protocol=4), id="pandas_proto_4"),
pytest.param(
functools.partial(pd.to_pickle, protocol=5),
id="pandas_proto_5",
marks=pytest.mark.skipif(not PY38, reason="protocol 5 not supported"),
),
],
)
def test_round_trip_current(current_pickle_data, pickle_writer):
data = current_pickle_data
for typ, dv in data.items():
for dt, expected in dv.items():

for writer in [pd.to_pickle, python_pickler]:
if writer is None:
continue

with tm.ensure_clean() as path:

# test writing with each pickler
writer(expected, path)
pickle_writer(expected, path)

# test reading with each unpickler
result = pd.read_pickle(path)
Expand Down

1 comment on commit 0fa47b6

@TNieuwdorp
Copy link

@TNieuwdorp TNieuwdorp commented on 0fa47b6 Jan 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#39002

Commit possibly causes the above bug in case of compression.

Please sign in to comment.