From 5667a3ad0489815c1239cba785300952c9799000 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 9 Mar 2017 09:50:04 -0500
Subject: [PATCH] TST: fix up compression tests / docs

---
 doc/source/io.rst               |  55 +++---
 doc/source/whatsnew/v0.20.0.txt |  40 +++--
 pandas/tests/io/test_pickle.py  | 289 ++++++++++++++++----------------
 3 files changed, 208 insertions(+), 176 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 67491c8b30de7..fdd33ab4625f3 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3042,22 +3042,19 @@ any pickled pandas object (or any other pickled object) from file:
    See `this question <http://stackoverflow.com/questions/20444593/pandas-compiled-from-source-default-pickle-behavior-changed>`__
    for a detailed explanation.
 
-.. note::
-
-    These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated.
-
 .. _io.pickle.compression:
 
-Read/Write compressed pickle files
-''''''''''''''
+Compressed pickle files
+'''''''''''''''''''''''
 
 .. versionadded:: 0.20.0
 
 :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read
-and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports
-both read and write. ``zip`` file supports read only and must contain only one data file
+and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing.
+`zip`` file supports read only and must contain only one data file
 to be read in.
-Compression type can be an explicitely parameter or be inferred from the file extension.
+
+The compression type can be an explicit parameter or be inferred from the file extension.
 If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or
 ``'.xz'``, respectively.
 
@@ -3065,17 +3062,37 @@ If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in `
 
    df = pd.DataFrame({
        'A': np.random.randn(1000),
-       'B': np.random.randn(1000),
-       'C': np.random.randn(1000)})
-   df.to_pickle("data.pkl.compress", compression="gzip")  # explicit compression type
-   df.to_pickle("data.pkl.xz", compression="infer")  # infer compression type from extension
-   df.to_pickle("data.pkl.gz")  # default, using "infer"
-   df["A"].to_pickle("s1.pkl.bz2")
+       'B': 'foo',
+       'C': pd.date_range('20130101', periods=1000, freq='s')})
+   df
+
+Using an explicit compression type
+
+.. ipython:: python
 
-   df = pd.read_pickle("data.pkl.compress", compression="gzip")
-   df = pd.read_pickle("data.pkl.xz", compression="infer")
-   df = pd.read_pickle("data.pkl.gz")
-   s = pd.read_pickle("s1.pkl.bz2")
+   df.to_pickle("data.pkl.compress", compression="gzip")
+   rt = pd.read_pickle("data.pkl.compress", compression="gzip")
+   rt
+
+Inferring compression type from the extension
+
+.. ipython:: python
+
+   df.to_pickle("data.pkl.xz", compression="infer")
+   rt = pd.read_pickle("data.pkl.xz", compression="infer")
+   rt
+
+The default is to 'infer
+
+.. ipython:: python
+
+   df.to_pickle("data.pkl.gz")
+   rt = pd.read_pickle("data.pkl.gz")
+   rt
+
+   df["A"].to_pickle("s1.pkl.bz2")
+   rt = pd.read_pickle("s1.pkl.bz2")
+   rt
 
 .. ipython:: python
    :suppress:
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 4b320d21fe738..8f671062464f0 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -102,23 +102,41 @@ Pickle file I/O now supports compression
 :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle`
 can now read from and write to compressed pickle files. Compression methods
 can be an explicit parameter or be inferred from the file extension.
-See :ref:`Read/Write compressed pickle files <io.pickle.compression>`
+See :ref:`the docs here <io.pickle.compression>`
 
 .. ipython:: python
 
    df = pd.DataFrame({
        'A': np.random.randn(1000),
-       'B': np.random.randn(1000),
-       'C': np.random.randn(1000)})
-   df.to_pickle("data.pkl.compress", compression="gzip")  # explicit compression type
-   df.to_pickle("data.pkl.xz", compression="infer")  # infer compression type from extension
-   df.to_pickle("data.pkl.gz")  # default, using "infer"
-   df["A"].to_pickle("s1.pkl.bz2")
+       'B': 'foo',
+       'C': pd.date_range('20130101', periods=1000, freq='s')})
+
+Using an explicit compression type
+
+.. ipython:: python
 
-   df = pd.read_pickle("data.pkl.compress", compression="gzip")
-   df = pd.read_pickle("data.pkl.xz", compression="infer")
-   df = pd.read_pickle("data.pkl.gz")
-   s = pd.read_pickle("s1.pkl.bz2")
+   df.to_pickle("data.pkl.compress", compression="gzip")
+   rt = pd.read_pickle("data.pkl.compress", compression="gzip")
+   rt
+
+Inferring compression type from the extension
+
+.. ipython:: python
+
+   df.to_pickle("data.pkl.xz", compression="infer")
+   rt = pd.read_pickle("data.pkl.xz", compression="infer")
+   rt
+
+The default is to 'infer
+
+.. ipython:: python
+
+   df.to_pickle("data.pkl.gz")
+   rt = pd.read_pickle("data.pkl.gz")
+   rt
+   df["A"].to_pickle("s1.pkl.bz2")
+   rt = pd.read_pickle("s1.pkl.bz2")
+   rt
 
 .. ipython:: python
    :suppress:
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 2fffc3c39ec26..91e70e942089c 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -306,191 +306,188 @@ def test_pickle_v0_15_2():
 # ---------------------
 # test pickle compression
 # ---------------------
-_compression_to_extension = {
-    None: ".none",
-    'gzip': '.gz',
-    'bz2': '.bz2',
-    'zip': '.zip',
-    'xz': '.xz',
-}
-
 
+@pytest.fixture
 def get_random_path():
     return u'__%s__.pickle' % tm.rands(10)
 
 
-def compress_file(src_path, dest_path, compression):
-    if compression is None:
-        shutil.copyfile(src_path, dest_path)
-        return
-
-    if compression == 'gzip':
-        import gzip
-        f = gzip.open(dest_path, "w")
-    elif compression == 'bz2':
-        import bz2
-        f = bz2.BZ2File(dest_path, "w")
-    elif compression == 'zip':
-        import zipfile
-        zip_file = zipfile.ZipFile(dest_path, "w",
-                                   compression=zipfile.ZIP_DEFLATED)
-        zip_file.write(src_path, os.path.basename(src_path))
-    elif compression == 'xz':
-        lzma = pandas.compat.import_lzma()
-        f = lzma.LZMAFile(dest_path, "w")
-    else:
-        msg = 'Unrecognized compression type: {}'.format(compression)
-        raise ValueError(msg)
-
-    if compression != "zip":
-        f.write(open(src_path, "rb").read())
-        f.close()
+class TestCompression(object):
 
+    _compression_to_extension = {
+        None: ".none",
+        'gzip': '.gz',
+        'bz2': '.bz2',
+        'zip': '.zip',
+        'xz': '.xz',
+    }
 
-def decompress_file(src_path, dest_path, compression):
-    if compression is None:
-        shutil.copyfile(src_path, dest_path)
-        return
+    def compress_file(self, src_path, dest_path, compression):
+        if compression is None:
+            shutil.copyfile(src_path, dest_path)
+            return
 
-    if compression == 'gzip':
-        import gzip
-        f = gzip.open(src_path, "r")
-    elif compression == 'bz2':
-        import bz2
-        f = bz2.BZ2File(src_path, "r")
-    elif compression == 'zip':
-        import zipfile
-        zip_file = zipfile.ZipFile(src_path)
-        zip_names = zip_file.namelist()
-        if len(zip_names) == 1:
-            f = zip_file.open(zip_names.pop())
+        if compression == 'gzip':
+            import gzip
+            f = gzip.open(dest_path, "w")
+        elif compression == 'bz2':
+            import bz2
+            f = bz2.BZ2File(dest_path, "w")
+        elif compression == 'zip':
+            import zipfile
+            zip_file = zipfile.ZipFile(dest_path, "w",
+                                       compression=zipfile.ZIP_DEFLATED)
+            zip_file.write(src_path, os.path.basename(src_path))
+        elif compression == 'xz':
+            lzma = pandas.compat.import_lzma()
+            f = lzma.LZMAFile(dest_path, "w")
         else:
-            raise ValueError('ZIP file {} error. Only one file per ZIP.'
-                             .format(src_path))
-    elif compression == 'xz':
-        lzma = pandas.compat.import_lzma()
-        f = lzma.LZMAFile(src_path, "r")
-    else:
-        msg = 'Unrecognized compression type: {}'.format(compression)
-        raise ValueError(msg)
-
-    open(dest_path, "wb").write(f.read())
-    f.close()
+            msg = 'Unrecognized compression type: {}'.format(compression)
+            raise ValueError(msg)
 
+        if compression != "zip":
+            f.write(open(src_path, "rb").read())
+            f.close()
 
-@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz'])
-def test_write_explicit(compression):
-    # issue 11666
-    if compression == 'xz':
-        tm._skip_if_no_lzma()
+    def decompress_file(self, src_path, dest_path, compression):
+        if compression is None:
+            shutil.copyfile(src_path, dest_path)
+            return
 
-    base = get_random_path()
-    path1 = base + ".compressed"
-    path2 = base + ".raw"
+        if compression == 'gzip':
+            import gzip
+            f = gzip.open(src_path, "r")
+        elif compression == 'bz2':
+            import bz2
+            f = bz2.BZ2File(src_path, "r")
+        elif compression == 'zip':
+            import zipfile
+            zip_file = zipfile.ZipFile(src_path)
+            zip_names = zip_file.namelist()
+            if len(zip_names) == 1:
+                f = zip_file.open(zip_names.pop())
+            else:
+                raise ValueError('ZIP file {} error. Only one file per ZIP.'
+                                 .format(src_path))
+        elif compression == 'xz':
+            lzma = pandas.compat.import_lzma()
+            f = lzma.LZMAFile(src_path, "r")
+        else:
+            msg = 'Unrecognized compression type: {}'.format(compression)
+            raise ValueError(msg)
 
-    with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
-        df = tm.makeDataFrame()
+        open(dest_path, "wb").write(f.read())
+        f.close()
 
-        # write to compressed file
-        df.to_pickle(p1, compression=compression)
+    @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz'])
+    def test_write_explicit(self, compression, get_random_path):
+        # issue 11666
+        if compression == 'xz':
+            tm._skip_if_no_lzma()
 
-        # decompress
-        decompress_file(p1, p2, compression=compression)
+        base = get_random_path
+        path1 = base + ".compressed"
+        path2 = base + ".raw"
 
-        # read decompressed file
-        df2 = pd.read_pickle(p2, compression=None)
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
 
-        tm.assert_frame_equal(df, df2)
+            # write to compressed file
+            df.to_pickle(p1, compression=compression)
 
+            # decompress
+            self.decompress_file(p1, p2, compression=compression)
 
-@pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z'])
-def test_write_explicit_bad(compression):
-    with tm.assertRaisesRegexp(ValueError,
-                               "Unrecognized compression type"):
-        with tm.ensure_clean(get_random_path()) as path:
-            df = tm.makeDataFrame()
-            df.to_pickle(path, compression=compression)
+            # read decompressed file
+            df2 = pd.read_pickle(p2, compression=None)
 
+            tm.assert_frame_equal(df, df2)
 
-@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress'])
-def test_write_infer(ext):
-    if ext == '.xz':
-        tm._skip_if_no_lzma()
+    @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z'])
+    def test_write_explicit_bad(self, compression, get_random_path):
+        with tm.assertRaisesRegexp(ValueError,
+                                   "Unrecognized compression type"):
+            with tm.ensure_clean(get_random_path) as path:
+                df = tm.makeDataFrame()
+                df.to_pickle(path, compression=compression)
 
-    base = get_random_path()
-    path1 = base + ext
-    path2 = base + ".raw"
-    compression = None
-    for c in _compression_to_extension:
-        if _compression_to_extension[c] == ext:
-            compression = c
-            break
+    @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress'])
+    def test_write_infer(self, ext, get_random_path):
+        if ext == '.xz':
+            tm._skip_if_no_lzma()
 
-    with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
-        df = tm.makeDataFrame()
+        base = get_random_path
+        path1 = base + ext
+        path2 = base + ".raw"
+        compression = None
+        for c in self._compression_to_extension:
+            if self._compression_to_extension[c] == ext:
+                compression = c
+                break
 
-        # write to compressed file by inferred compression method
-        df.to_pickle(p1)
-
-        # decompress
-        decompress_file(p1, p2, compression=compression)
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
 
-        # read decompressed file
-        df2 = pd.read_pickle(p2, compression=None)
+            # write to compressed file by inferred compression method
+            df.to_pickle(p1)
 
-        tm.assert_frame_equal(df, df2)
+            # decompress
+            self.decompress_file(p1, p2, compression=compression)
 
+            # read decompressed file
+            df2 = pd.read_pickle(p2, compression=None)
 
-@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"])
-def test_read_explicit(compression):
-    # issue 11666
-    if compression == 'xz':
-        tm._skip_if_no_lzma()
+            tm.assert_frame_equal(df, df2)
 
-    base = get_random_path()
-    path1 = base + ".raw"
-    path2 = base + ".compressed"
+    @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"])
+    def test_read_explicit(self, compression, get_random_path):
+        # issue 11666
+        if compression == 'xz':
+            tm._skip_if_no_lzma()
 
-    with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
-        df = tm.makeDataFrame()
+        base = get_random_path
+        path1 = base + ".raw"
+        path2 = base + ".compressed"
 
-        # write to uncompressed file
-        df.to_pickle(p1, compression=None)
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
 
-        # compress
-        compress_file(p1, p2, compression=compression)
+            # write to uncompressed file
+            df.to_pickle(p1, compression=None)
 
-        # read compressed file
-        df2 = pd.read_pickle(p2, compression=compression)
+            # compress
+            self.compress_file(p1, p2, compression=compression)
 
-        tm.assert_frame_equal(df, df2)
+            # read compressed file
+            df2 = pd.read_pickle(p2, compression=compression)
 
+            tm.assert_frame_equal(df, df2)
 
-@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip',
-                                 '.no_compress'])
-def test_read_infer(ext):
-    if ext == '.xz':
-        tm._skip_if_no_lzma()
+    @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip',
+                                     '.no_compress'])
+    def test_read_infer(self, ext, get_random_path):
+        if ext == '.xz':
+            tm._skip_if_no_lzma()
 
-    base = get_random_path()
-    path1 = base + ".raw"
-    path2 = base + ext
-    compression = None
-    for c in _compression_to_extension:
-        if _compression_to_extension[c] == ext:
-            compression = c
-            break
+        base = get_random_path
+        path1 = base + ".raw"
+        path2 = base + ext
+        compression = None
+        for c in self._compression_to_extension:
+            if self._compression_to_extension[c] == ext:
+                compression = c
+                break
 
-    with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
-        df = tm.makeDataFrame()
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
 
-        # write to uncompressed file
-        df.to_pickle(p1, compression=None)
+            # write to uncompressed file
+            df.to_pickle(p1, compression=None)
 
-        # compress
-        compress_file(p1, p2, compression=compression)
+            # compress
+            self.compress_file(p1, p2, compression=compression)
 
-        # read compressed file by inferred compression method
-        df2 = pd.read_pickle(p2)
+            # read compressed file by inferred compression method
+            df2 = pd.read_pickle(p2)
 
-        tm.assert_frame_equal(df, df2)
+            tm.assert_frame_equal(df, df2)