Skip to content

Commit

Permalink
BUG: set keyword argument so zipfile actually compresses (pandas-dev#…
Browse files Browse the repository at this point in the history
…21144)

(cherry picked from commit c85ab08)
  • Loading branch information
minggli authored and jorisvandenbossche committed Jun 8, 2018
1 parent 8feebe7 commit ea4e49d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.1.txt
Expand Up @@ -84,6 +84,7 @@ Indexing
I/O
^^^

- Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`)
- Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`)
-

Expand Down
8 changes: 4 additions & 4 deletions pandas/io/common.py
Expand Up @@ -5,7 +5,7 @@
import codecs
import mmap
from contextlib import contextmanager, closing
from zipfile import ZipFile
import zipfile

from pandas.compat import StringIO, BytesIO, string_types, text_type
from pandas import compat
Expand Down Expand Up @@ -428,7 +428,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
return f, handles


class BytesZipFile(ZipFile, BytesIO):
class BytesZipFile(zipfile.ZipFile, BytesIO):
"""
Wrapper for standard library class ZipFile and allow the returned file-like
handle to accept byte strings via `write` method.
Expand All @@ -437,10 +437,10 @@ class BytesZipFile(ZipFile, BytesIO):
bytes strings into a member of the archive.
"""
# GH 17778
def __init__(self, file, mode='r', **kwargs):
def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
if mode in ['wb', 'rb']:
mode = mode.replace('b', '')
super(BytesZipFile, self).__init__(file, mode, **kwargs)
super(BytesZipFile, self).__init__(file, mode, compression, **kwargs)

def write(self, data):
super(BytesZipFile, self).writestr(self.filename, data)
Expand Down
21 changes: 20 additions & 1 deletion pandas/tests/test_common.py
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-

import pytest
import os
import collections
from functools import partial

import numpy as np

from pandas import Series, Timestamp
from pandas import Series, DataFrame, Timestamp
from pandas.compat import range, lmap
import pandas.core.common as com
from pandas.core import ops
Expand Down Expand Up @@ -222,3 +223,21 @@ def test_standardize_mapping():

dd = collections.defaultdict(list)
assert isinstance(com.standardize_mapping(dd), partial)


@pytest.mark.parametrize('obj', [
DataFrame(100 * [[0.123456, 0.234567, 0.567567],
[12.32112, 123123.2, 321321.2]],
columns=['X', 'Y', 'Z']),
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv'])
def test_compression_size(obj, method, compression):
if not compression:
pytest.skip("only test compression case.")

with tm.ensure_clean() as filename:
getattr(obj, method)(filename, compression=compression)
compressed = os.path.getsize(filename)
getattr(obj, method)(filename, compression=None)
uncompressed = os.path.getsize(filename)
assert uncompressed > compressed

0 comments on commit ea4e49d

Please sign in to comment.