Skip to content

Commit

Permalink
Updated file read to not fail early with a boto empty file exception (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
MSeal committed Jun 6, 2021
1 parent 05f224b commit d30bcf2
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
6 changes: 6 additions & 0 deletions papermill/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,12 @@ def cat(
elif size != obj.content_length:
raise AwsError('key size unexpectedly changed while reading')

# For an empty file, 0 (first-bytes-pos) is equal to the length of the object
# hence the range is "unsatisfiable", and botocore correctly handles it by
# raising an exception. We'd rather just return with empty file contents here.
if size == 0:
break

r = obj.get(Range="bytes={}-".format(bytes_read))

try:
Expand Down
11 changes: 10 additions & 1 deletion papermill/tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def test_s3_defaults():
test_bucket_name = 'test-pm-bucket'
test_string = 'Hello'
test_file_path = 'notebooks/s3/s3_in/s3-simple_notebook.ipynb'
test_empty_file_path = 'notebooks/s3/s3_in/s3-empty.ipynb'

with open(os.path.join(local_dir, test_file_path)) as f:
test_nb_content = f.read()
Expand All @@ -169,10 +170,12 @@ def s3_client():
client = boto3.client('s3')
client.create_bucket(Bucket=test_bucket_name)
client.put_object(Bucket=test_bucket_name, Key=test_file_path, Body=test_nb_content)
client.put_object(Bucket=test_bucket_name, Key=test_empty_file_path, Body='')
yield S3()
try:
client.delete_object(Bucket=test_bucket_name, Key=test_file_path)
client.delete_object(Bucket=test_bucket_name, Key=test_file_path + '.txt')
client.delete_object(Bucket=test_bucket_name, Key=test_empty_file_path)
except Exception:
pass
mock_s3.stop()
Expand All @@ -184,6 +187,12 @@ def test_s3_read(s3_client):
assert data == test_clean_nb_content


def test_s3_read_empty(s3_client):
s3_path = "s3://{}/{}".format(test_bucket_name, test_empty_file_path)
data = read_from_gen(s3_client.read(s3_path))
assert data == ''


def test_s3_write(s3_client):
s3_path = "s3://{}/{}.txt".format(test_bucket_name, test_file_path)
s3_client.cp_string(test_string, s3_path)
Expand All @@ -205,5 +214,5 @@ def test_s3_listdir(s3_client):
s3_dir = "s3://{}/{}".format(test_bucket_name, dir_name)
s3_path = "s3://{}/{}".format(test_bucket_name, test_file_path)
dir_listings = s3_client.listdir(s3_dir)
assert len(dir_listings) == 1
assert len(dir_listings) == 2
assert s3_path in dir_listings

0 comments on commit d30bcf2

Please sign in to comment.