Skip to content

Commit

Permalink
Change download_as_bytes to download_as_string in gcs submodule (#571)
Browse files Browse the repository at this point in the history
* Update to use download_as_bytes instead of download_as_string

* Update CHANGELOG.md

Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
alexandreyc and mpenkov committed Dec 30, 2020
1 parent 36327f5 commit 3927ff5
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Unreleased

- Refactor s3 submodule to minimize resource usage (PR [#569](https://github.com/RaRe-Technologies/smart_open/pull/569), [@mpenkov](https://github.com/mpenkov))
- Change download_as_bytes to download_as_string in gcs submodule (PR [#571](https://github.com/RaRe-Technologies/smart_open/pull/571), [@alexandreyc](https://github.com/alexandreyc))

# 4.0.1, 27 Nov 2020

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -329,12 +329,12 @@ GCS Credentials
---------------
``smart_open`` uses the ``google-cloud-storage`` library to talk to GCS.
``google-cloud-storage`` uses the ``google-cloud`` package under the hood to handle authentication.
There are several `options <https://google-cloud-python.readthedocs.io/en/0.32.0/core/auth.html>`__ to provide
There are several `options <https://googleapis.dev/python/google-api-core/latest/auth.html>`__ to provide
credentials.
By default, ``smart_open`` will defer to ``google-cloud-storage`` and let it take care of the credentials.

To override this behavior, pass a ``google.cloud.storage.Client`` object as a transport parameter to the ``open`` function.
You can `customize the credentials <https://google-cloud-python.readthedocs.io/en/0.32.0/core/client.html>`__
You can `customize the credentials <https://googleapis.dev/python/storage/latest/client.html>`__
when constructing the client. ``smart_open`` will then use the client when talking to GCS. To follow allow with
the example below, `refer to Google's guide <https://cloud.google.com/storage/docs/reference/libraries#setting_up_authentication>`__
to setting up GCS authentication with a service account.
Expand Down
4 changes: 2 additions & 2 deletions smart_open/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ def _download_blob_chunk(self, size):
#
binary = b''
elif size == -1:
binary = self._blob.download_as_string(start=start)
binary = self._blob.download_as_bytes(start=start)
else:
end = position + size
binary = self._blob.download_as_string(start=start, end=end)
binary = self._blob.download_as_bytes(start=start, end=end)
return binary


Expand Down
20 changes: 10 additions & 10 deletions smart_open/tests/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ def delete(self):
self._bucket.delete_blob(self)
self._exists = False

def download_as_string(self, start=0, end=None):
# mimics Google's API by returning bytes, despite the method name
# https://google-cloud-python.readthedocs.io/en/0.32.0/storage/blobs.html#google.cloud.storage.blob.Blob.download_as_string
def download_as_bytes(self, start=0, end=None):
# mimics Google's API by returning bytes
# https://googleapis.dev/python/storage/latest/blobs.html#google.cloud.storage.blob.Blob.download_as_bytes
if end is None:
end = self.__contents.tell()
self.__contents.seek(start)
Expand All @@ -170,7 +170,7 @@ def exists(self, client=None):

def upload_from_string(self, data):
# mimics Google's API by accepting bytes or str, despite the method name
# https://google-cloud-python.readthedocs.io/en/0.32.0/storage/blobs.html#google.cloud.storage.blob.Blob.upload_from_string
# https://googleapis.dev/python/storage/latest/blobs.html#google.cloud.storage.blob.Blob.upload_from_string
if isinstance(data, str):
data = bytes(data, 'utf8')
self.__contents = io.BytesIO(data)
Expand Down Expand Up @@ -214,10 +214,10 @@ def test_upload_download(self):
blob = FakeBlob('fake-blob', self.bucket)
contents = b'test'
blob.upload_from_string(contents)
self.assertEqual(blob.download_as_string(), b'test')
self.assertEqual(blob.download_as_string(start=2), b'st')
self.assertEqual(blob.download_as_string(end=2), b'te')
self.assertEqual(blob.download_as_string(start=2, end=3), b's')
self.assertEqual(blob.download_as_bytes(), b'test')
self.assertEqual(blob.download_as_bytes(start=2), b'st')
self.assertEqual(blob.download_as_bytes(end=2), b'te')
self.assertEqual(blob.download_as_bytes(start=2, end=3), b's')

def test_size(self):
blob = FakeBlob('fake-blob', self.bucket)
Expand Down Expand Up @@ -372,7 +372,7 @@ def test_unfinished_put_does_not_write_to_blob(self):
response = self.session.put(self.upload_url, data, headers=headers)
self.assertIn(response.status_code, smart_open.gcs._UPLOAD_INCOMPLETE_STATUS_CODES)
self.session._blob_with_url(self.upload_url, self.client)
blob_contents = self.blob.download_as_string()
blob_contents = self.blob.download_as_bytes()
self.assertEqual(blob_contents, b'')

def test_finished_put_writes_to_blob(self):
Expand All @@ -384,7 +384,7 @@ def test_finished_put_writes_to_blob(self):
response = self.session.put(self.upload_url, data, headers=headers)
self.assertEqual(response.status_code, 200)
self.session._blob_with_url(self.upload_url, self.client)
blob_contents = self.blob.download_as_string()
blob_contents = self.blob.download_as_bytes()
data.seek(0)
self.assertEqual(blob_contents, data.read())

Expand Down

0 comments on commit 3927ff5

Please sign in to comment.