From a94710d291a6786c4c9d81c416ec117823bdecc7 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 09:57:25 +0300 Subject: [PATCH 01/10] add pathlib monkeypatch with test --- smart_open/smart_open_lib.py | 10 ++++++++++ smart_open/tests/test_smart_open.py | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index c6ad586e..9e71a146 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -928,3 +928,13 @@ def _encoding_wrapper(fileobj, mode, encoding=None, errors=None): if mode[0] in ('w', 'a') or mode.endswith('+'): fileobj = codecs.getwriter(encoding)(fileobj, **kw) return fileobj + + +def patch_pathlib(): + """Replace `Path.open` with `smart_open.open`""" + pathlib = sys.modules.get("pathlib", None) + + if pathlib: + pathlib.Path.open = open + else: + warnings.warn("Can't patch 'pathlib.Path.open', you should import 'pathlib' first") diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index e3c77323..36a56481 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -13,6 +13,7 @@ import tempfile import os import hashlib +import pathlib import boto3 import mock @@ -24,6 +25,7 @@ import smart_open from smart_open import smart_open_lib from smart_open import webhdfs +from smart_open.smart_open_lib import patch_pathlib logger = logging.getLogger(__name__) @@ -287,6 +289,12 @@ def test_gs_uri_contains_slash(self): self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.blob_id, "mydir/myblob") + def test_pathlib_monkeypath(self): + assert pathlib.Path.open != smart_open.open + patch_pathlib() + assert pathlib.Path.open == smart_open.open + + class SmartOpenHttpTest(unittest.TestCase): """ From bb9a2cb5787a7229013b0d89c7c23c028838ca50 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 10:38:46 +0300 Subject: [PATCH 02/10] add context-manager functionality & moar tests --- smart_open/smart_open_lib.py | 21 ++++++++++++++++++--- smart_open/tests/test_smart_open.py | 27 +++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 9e71a146..91837635 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -930,11 +930,26 @@ def _encoding_wrapper(fileobj, mode, encoding=None, errors=None): return fileobj -def patch_pathlib(): +class patch_pathlib(object): """Replace `Path.open` with `smart_open.open`""" + + def __init__(self): + self.old_impl = _patch_pathlib(open) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + _patch_pathlib(self.old_impl) + + +def _patch_pathlib(func): + """Replace `Path.open` with `func`""" pathlib = sys.modules.get("pathlib", None) if pathlib: - pathlib.Path.open = open + old_impl = pathlib.Path.open + pathlib.Path.open = func + return old_impl else: - warnings.warn("Can't patch 'pathlib.Path.open', you should import 'pathlib' first") + raise RuntimeError("Can't patch 'pathlib.Path.open', you should import 'pathlib' first") diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 36a56481..927a18c9 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -25,7 +25,7 @@ import smart_open from smart_open import smart_open_lib from smart_open import webhdfs -from smart_open.smart_open_lib import patch_pathlib +from smart_open.smart_open_lib import patch_pathlib, _patch_pathlib logger = logging.getLogger(__name__) @@ -291,9 +291,32 @@ def test_gs_uri_contains_slash(self): def test_pathlib_monkeypath(self): assert pathlib.Path.open != smart_open.open - patch_pathlib() + + with patch_pathlib(): + assert pathlib.Path.open == smart_open.open + + assert pathlib.Path.open != smart_open.open + + obj = patch_pathlib() assert pathlib.Path.open == smart_open.open + _patch_pathlib(obj.old_impl) + assert pathlib.Path.open != smart_open.open + + def test_pathlib_monkeypath_read_gz(self): + path = pathlib.Path(CURR_DIR) / 'test_data' / 'crime-and-punishment.txt.gz' + + # Check that standart implementation can't work with gzip + with path.open("r") as infile: + with self.assertRaises(Exception) as context: + lines = infile.readlines() + + # Check that out implementation works with gzip + obj = patch_pathlib() + with path.open("r") as infile: + lines = infile.readlines() + + _patch_pathlib(obj.old_impl) class SmartOpenHttpTest(unittest.TestCase): From c3ce26e8b6e79bdb660feb7c275f6b118ad67f7d Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 10:56:47 +0300 Subject: [PATCH 03/10] add docs --- README.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.rst b/README.rst index d62b3679..a76c1f4e 100644 --- a/README.rst +++ b/README.rst @@ -382,6 +382,26 @@ If your file object doesn't have one, set the ``.name`` attribute to an appropri Furthermore, that value has to end with a **known** file extension (see the ``register_compressor`` function). Otherwise, the transparent decompression will not occur. +Drop-in replacement of ``pathlib.Path.open`` +-------------------------------------------- + +Now you can natively use ``smart_open.open`` with your ``Path`` objects + +.. code-block:: python + + >> from pathlib import Path + >> from smart_open.smart_open_lib import patch_pathlib + >> + >> patch_pathlib() # replace `Path.open` with `smart_open.open` + >> + >> path = Path("/path/to/my/fize.gz") + >> with path.open("r") as infile: + .. # not possible with standard `Path.open` (because gzipped), + .. # but works perfectly with "patching" + .. for line in infile: + .. print(line) + + Comments, bug reports ===================== From 68299302b658ba980c9df3f886964d30b8660aab Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:00:27 +0300 Subject: [PATCH 04/10] upd --- smart_open/smart_open_lib.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 91837635..11c335b0 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -947,9 +947,9 @@ def _patch_pathlib(func): """Replace `Path.open` with `func`""" pathlib = sys.modules.get("pathlib", None) - if pathlib: - old_impl = pathlib.Path.open - pathlib.Path.open = func - return old_impl - else: + if not pathlib: raise RuntimeError("Can't patch 'pathlib.Path.open', you should import 'pathlib' first") + + old_impl = pathlib.Path.open + pathlib.Path.open = func + return old_impl From 8b86ca88a9900adcd5fce5d4f86163d2288991b1 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:07:43 +0300 Subject: [PATCH 05/10] add real file --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index a76c1f4e..4bf4d590 100644 --- a/README.rst +++ b/README.rst @@ -394,10 +394,11 @@ Now you can natively use ``smart_open.open`` with your ``Path`` objects >> >> patch_pathlib() # replace `Path.open` with `smart_open.open` >> - >> path = Path("/path/to/my/fize.gz") + >> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") + >>> >> with path.open("r") as infile: .. # not possible with standard `Path.open` (because gzipped), - .. # but works perfectly with "patching" + .. # but works perfectly with "patched" version by `smart_open` .. for line in infile: .. print(line) From 9d733aa66111187152d04608fb8af9e24e28dfd9 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:17:46 +0300 Subject: [PATCH 06/10] fix --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 4bf4d590..f775b14b 100644 --- a/README.rst +++ b/README.rst @@ -395,13 +395,13 @@ Now you can natively use ``smart_open.open`` with your ``Path`` objects >> patch_pathlib() # replace `Path.open` with `smart_open.open` >> >> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") - >>> + >> >> with path.open("r") as infile: .. # not possible with standard `Path.open` (because gzipped), .. # but works perfectly with "patched" version by `smart_open` .. for line in infile: .. print(line) - + .. break Comments, bug reports ===================== From bc1602b23d5ceeeeec0abc147494b94149986924 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:29:07 +0300 Subject: [PATCH 07/10] fix doctest? --- README.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index f775b14b..1227caf0 100644 --- a/README.rst +++ b/README.rst @@ -389,19 +389,19 @@ Now you can natively use ``smart_open.open`` with your ``Path`` objects .. code-block:: python - >> from pathlib import Path - >> from smart_open.smart_open_lib import patch_pathlib - >> - >> patch_pathlib() # replace `Path.open` with `smart_open.open` - >> - >> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") - >> - >> with path.open("r") as infile: - .. # not possible with standard `Path.open` (because gzipped), - .. # but works perfectly with "patched" version by `smart_open` - .. for line in infile: - .. print(line) - .. break + >>> from pathlib import Path + >>> from smart_open.smart_open_lib import patch_pathlib + >>> + >>> patch_pathlib() # replace `Path.open` with `smart_open.open` + >>> + >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") + >>> + >>> with path.open("r") as infile: + ... # not possible with standard `Path.open` (because gzipped), + ... # but works perfectly with "patched" version by `smart_open` + ... for line in infile: + ... print(line) + ... break Comments, bug reports ===================== From 5fbbd567c44e5e06ba430722314082fcd09a4978 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:34:03 +0300 Subject: [PATCH 08/10] fix doctest (again?) --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 1227caf0..82fe7988 100644 --- a/README.rst +++ b/README.rst @@ -392,7 +392,7 @@ Now you can natively use ``smart_open.open`` with your ``Path`` objects >>> from pathlib import Path >>> from smart_open.smart_open_lib import patch_pathlib >>> - >>> patch_pathlib() # replace `Path.open` with `smart_open.open` + >>> _ = patch_pathlib() # replace `Path.open` with `smart_open.open` >>> >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") >>> @@ -400,8 +400,9 @@ Now you can natively use ``smart_open.open`` with your ``Path`` objects ... # not possible with standard `Path.open` (because gzipped), ... # but works perfectly with "patched" version by `smart_open` ... for line in infile: - ... print(line) + ... print(repr(line)) ... break + 'В начале июля, в чрезвычайно жаркое время, под вечер, один молодой человек вышел из своей каморки, которую нанимал от жильцов в С -- м переулке, на улицу и медленно, как бы в нерешимости, отправился к К -- ну мосту.\n' Comments, bug reports ===================== From 4088123c24fbf9ed457ae21f794dffdbbc2fb370 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:36:54 +0300 Subject: [PATCH 09/10] upd --- README.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 82fe7988..ebf98a54 100644 --- a/README.rst +++ b/README.rst @@ -385,7 +385,8 @@ Otherwise, the transparent decompression will not occur. Drop-in replacement of ``pathlib.Path.open`` -------------------------------------------- -Now you can natively use ``smart_open.open`` with your ``Path`` objects +Now you can natively use ``smart_open.open`` with your ``Path`` objects. +You can't transparently read text from compressed file with original ``Path.open``, but can after ``patch_pathlib``. .. code-block:: python @@ -397,8 +398,6 @@ Now you can natively use ``smart_open.open`` with your ``Path`` objects >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") >>> >>> with path.open("r") as infile: - ... # not possible with standard `Path.open` (because gzipped), - ... # but works perfectly with "patched" version by `smart_open` ... for line in infile: ... print(repr(line)) ... break From 988840aa69b56aa9be84619ed5cf0884dce95c37 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sat, 21 Mar 2020 11:48:15 +0300 Subject: [PATCH 10/10] simpler doctest --- README.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index ebf98a54..51a95e03 100644 --- a/README.rst +++ b/README.rst @@ -398,10 +398,8 @@ You can't transparently read text from compressed file with original ``Path.open >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") >>> >>> with path.open("r") as infile: - ... for line in infile: - ... print(repr(line)) - ... break - 'В начале июля, в чрезвычайно жаркое время, под вечер, один молодой человек вышел из своей каморки, которую нанимал от жильцов в С -- м переулке, на улицу и медленно, как бы в нерешимости, отправился к К -- ну мосту.\n' + ... print(infile.readline()[:41]) + В начале июля, в чрезвычайно жаркое время Comments, bug reports =====================