From 53593d095e0816614b0503f6899410cfb471655d Mon Sep 17 00:00:00 2001
From: Krzysztof Chomski <krzysztof.chomski@reef.pl>
Date: Wed, 1 Nov 2017 22:39:40 +0100
Subject: [PATCH 1/2] BUG: GH17778 - DataFrame.to_pickle() fails for .zip
 format. GH17778: add 'zip' format to unittests. Added entry in
 doc/source/whatsnew/v0.22.0.txt file to Bug Fixes section.

---
 doc/source/whatsnew/v0.22.0.txt |  1 +
 pandas/io/common.py             | 23 +++++++++++++----------
 pandas/io/pickle.py             | 12 +++++++++++-
 pandas/tests/io/test_pickle.py  |  7 ++++---
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 8afdd1b2e22b3..4211d9913a497 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -90,6 +90,7 @@ Documentation Changes
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``DataFrame.to_pickle()`` fails for .zip format (:issue:`17778`)
 
 Conversion
 ^^^^^^^^^^
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 534c1e0671150..f799cab161cd9 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -357,17 +357,20 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
         # ZIP Compression
         elif compression == 'zip':
             import zipfile
-            zip_file = zipfile.ZipFile(path_or_buf)
-            zip_names = zip_file.namelist()
-            if len(zip_names) == 1:
-                f = zip_file.open(zip_names.pop())
-            elif len(zip_names) == 0:
-                raise ValueError('Zero files found in ZIP file {}'
-                                 .format(path_or_buf))
+            if mode == 'wb':
+                f = zipfile.ZipFile(path_or_buf, 'w')
             else:
-                raise ValueError('Multiple files found in ZIP file.'
-                                 ' Only one file per ZIP: {}'
-                                 .format(zip_names))
+                zip_file = zipfile.ZipFile(path_or_buf)
+                zip_names = zip_file.namelist()
+                if len(zip_names) == 1:
+                    f = zip_file.open(zip_names.pop())
+                elif len(zip_names) == 0:
+                    raise ValueError('Zero files found in ZIP file {}'
+                                     .format(path_or_buf))
+                else:
+                    raise ValueError('Multiple files found in ZIP file.'
+                                     ' Only one file per ZIP: {}'
+                                     .format(zip_names))
 
         # XZ Compression
         elif compression == 'xz':
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 143b76575e36b..aab9ffa1cce45 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -42,7 +42,17 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
     if protocol < 0:
         protocol = pkl.HIGHEST_PROTOCOL
     try:
-        pkl.dump(obj, f, protocol=protocol)
+        import zipfile
+        if isinstance(f, zipfile.ZipFile):
+            import os
+            import tempfile
+            tmp_file = tempfile.NamedTemporaryFile(delete=False)
+            pkl.dump(obj, tmp_file, protocol=protocol)
+            tmp_file.close()
+            f.write(tmp_file.name)
+            os.remove(tmp_file.name)
+        else:
+            pkl.dump(obj, f, protocol=protocol)
     finally:
         for _f in fh:
             _f.close()
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 91c1f19f5caab..91b59b2ff3ffb 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -382,7 +382,7 @@ def decompress_file(self, src_path, dest_path, compression):
             fh.write(f.read())
         f.close()
 
-    @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz'])
+    @pytest.mark.parametrize('compression', [None, 'gzip', 'zip', 'bz2', 'xz'])
     def test_write_explicit(self, compression, get_random_path):
         # issue 11666
         if compression == 'xz':
@@ -414,7 +414,8 @@ def test_write_explicit_bad(self, compression, get_random_path):
                 df = tm.makeDataFrame()
                 df.to_pickle(path, compression=compression)
 
-    @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress'])
+    @pytest.mark.parametrize('ext', ['', '.gz', '.zip', '.bz2', '.xz',
+                                     '.no_compress'])
     def test_write_infer(self, ext, get_random_path):
         if ext == '.xz':
             tm._skip_if_no_lzma()
@@ -442,7 +443,7 @@ def test_write_infer(self, ext, get_random_path):
 
             tm.assert_frame_equal(df, df2)
 
-    @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"])
+    @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', 'zip'])
     def test_read_explicit(self, compression, get_random_path):
         # issue 11666
         if compression == 'xz':

From 890337868f7dc357a7c69a1f9c3b35dcc1962043 Mon Sep 17 00:00:00 2001
From: Krzysztof Chomski <krzysztof.chomski@reef.pl>
Date: Tue, 14 Nov 2017 18:02:45 +0100
Subject: [PATCH 2/2] Added 'zip' to possible compression types in `to_pickle`
 docstring. Moved imports to top.

---
 pandas/io/pickle.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index aab9ffa1cce45..c165fe34f4734 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -1,5 +1,9 @@
 """ pickle compat """
 
+import os
+import tempfile
+import zipfile
+
 import numpy as np
 from numpy.lib.format import read_array, write_array
 from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3
@@ -16,7 +20,7 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
     obj : any object
     path : string
         File path
-    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
+    compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer'
         a string representing the compression to use in the output file
 
         .. versionadded:: 0.20.0
@@ -42,10 +46,7 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
     if protocol < 0:
         protocol = pkl.HIGHEST_PROTOCOL
     try:
-        import zipfile
         if isinstance(f, zipfile.ZipFile):
-            import os
-            import tempfile
             tmp_file = tempfile.NamedTemporaryFile(delete=False)
             pkl.dump(obj, tmp_file, protocol=protocol)
             tmp_file.close()