Skip to content

Commit

Permalink
Merge pull request #436 from menshikh-iv/mokeypath4pathlib
Browse files Browse the repository at this point in the history
Add pathlib monkeypatch with replacement of `pathlib.Path.open`
  • Loading branch information
mpenkov committed Mar 21, 2020
2 parents 68c2d7f + 988840a commit 90d4f72
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
19 changes: 19 additions & 0 deletions README.rst
Expand Up @@ -382,6 +382,25 @@ If your file object doesn't have one, set the ``.name`` attribute to an appropri
Furthermore, that value has to end with a **known** file extension (see the ``register_compressor`` function).
Otherwise, the transparent decompression will not occur.

Drop-in replacement of ``pathlib.Path.open``
--------------------------------------------

Now you can natively use ``smart_open.open`` with your ``Path`` objects.
You can't transparently read text from compressed file with original ``Path.open``, but can after ``patch_pathlib``.

.. code-block:: python
>>> from pathlib import Path
>>> from smart_open.smart_open_lib import patch_pathlib
>>>
>>> _ = patch_pathlib() # replace `Path.open` with `smart_open.open`
>>>
>>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz")
>>>
>>> with path.open("r") as infile:
... print(infile.readline()[:41])
В начале июля, в чрезвычайно жаркое время
Comments, bug reports
=====================

Expand Down
25 changes: 25 additions & 0 deletions smart_open/smart_open_lib.py
Expand Up @@ -928,3 +928,28 @@ def _encoding_wrapper(fileobj, mode, encoding=None, errors=None):
if mode[0] in ('w', 'a') or mode.endswith('+'):
fileobj = codecs.getwriter(encoding)(fileobj, **kw)
return fileobj


class patch_pathlib(object):
"""Replace `Path.open` with `smart_open.open`"""

def __init__(self):
self.old_impl = _patch_pathlib(open)

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
_patch_pathlib(self.old_impl)


def _patch_pathlib(func):
"""Replace `Path.open` with `func`"""
pathlib = sys.modules.get("pathlib", None)

if not pathlib:
raise RuntimeError("Can't patch 'pathlib.Path.open', you should import 'pathlib' first")

old_impl = pathlib.Path.open
pathlib.Path.open = func
return old_impl
31 changes: 31 additions & 0 deletions smart_open/tests/test_smart_open.py
Expand Up @@ -13,6 +13,7 @@
import tempfile
import os
import hashlib
import pathlib

import boto3
import mock
Expand All @@ -24,6 +25,7 @@
import smart_open
from smart_open import smart_open_lib
from smart_open import webhdfs
from smart_open.smart_open_lib import patch_pathlib, _patch_pathlib

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -287,6 +289,35 @@ def test_gs_uri_contains_slash(self):
self.assertEqual(parsed_uri.bucket_id, "mybucket")
self.assertEqual(parsed_uri.blob_id, "mydir/myblob")

def test_pathlib_monkeypath(self):
assert pathlib.Path.open != smart_open.open

with patch_pathlib():
assert pathlib.Path.open == smart_open.open

assert pathlib.Path.open != smart_open.open

obj = patch_pathlib()
assert pathlib.Path.open == smart_open.open

_patch_pathlib(obj.old_impl)
assert pathlib.Path.open != smart_open.open

def test_pathlib_monkeypath_read_gz(self):
path = pathlib.Path(CURR_DIR) / 'test_data' / 'crime-and-punishment.txt.gz'

# Check that standart implementation can't work with gzip
with path.open("r") as infile:
with self.assertRaises(Exception) as context:
lines = infile.readlines()

# Check that out implementation works with gzip
obj = patch_pathlib()
with path.open("r") as infile:
lines = infile.readlines()

_patch_pathlib(obj.old_impl)


class SmartOpenHttpTest(unittest.TestCase):
"""
Expand Down

0 comments on commit 90d4f72

Please sign in to comment.