diff --git a/smart_open/tests/test_s3.py b/smart_open/tests/test_s3.py index 5376a70b..60cdf0a4 100644 --- a/smart_open/tests/test_s3.py +++ b/smart_open/tests/test_s3.py @@ -18,7 +18,13 @@ import smart_open import smart_open.s3 -BUCKET_NAME = 'test-smartopen-{}'.format(uuid.uuid4().hex) # generate random bucket (avoid race-condition in CI) +# To reduce spurious errors due to S3's eventually-consistent behavior +# we create this bucket once before running these tests and then +# remove it when we're done. The bucket has a random name so that we +# can run multiple instances of this suite in parallel and not have +# them conflict with one another. Travis, for example, runs the Python +# 2.7, 3.6, and 3.7 suites concurrently. +BUCKET_NAME = 'test-smartopen-{}'.format(uuid.uuid4().hex) KEY_NAME = 'test-key' WRITE_KEY_NAME = 'test-write-key' DISABLE_MOCKS = os.environ.get('SO_DISABLE_MOCKS') == "1" @@ -34,29 +40,37 @@ def maybe_mock_s3(func): return moto.mock_s3(func) -def cleanup_bucket(s3, delete_bucket=False): - for bucket in s3.buckets.all(): - if bucket.name == BUCKET_NAME: - for key in bucket.objects.all(): - key.delete() +@maybe_mock_s3 +def setUpModule(): + '''Called once by unittest when initializing this module. Sets up the + test S3 bucket. - if delete_bucket: - bucket.delete() - return False - return True - return False + ''' + boto3.resource('s3').create_bucket(Bucket=BUCKET_NAME) -def create_bucket_and_key( - bucket_name=BUCKET_NAME, key_name=KEY_NAME, contents=None, - num_attempts=12, sleep_time=5): - # fake (or not) connection, bucket and key - logger.debug('%r', locals()) +@maybe_mock_s3 +def tearDownModule(): + '''Called once by unittest when tearing down this module. Empties and + removes the test S3 bucket. + + ''' s3 = boto3.resource('s3') - bucket_exist = cleanup_bucket(s3) + try: + cleanup_bucket() + s3.Bucket(BUCKET_NAME).delete() + except s3.meta.client.exceptions.NoSuchBucket: + pass + - if not bucket_exist: - mybucket = s3.create_bucket(Bucket=bucket_name) +def cleanup_bucket(): + for key in boto3.resource('s3').Bucket(BUCKET_NAME).objects.all(): + key.delete() + + +def put_to_bucket(contents, num_attempts=12, sleep_time=5): + # fake (or not) connection, bucket and key + logger.debug('%r', locals()) # # In real life, it can take a few seconds for the bucket to become ready. @@ -65,16 +79,13 @@ def create_bucket_and_key( # for attempt in range(num_attempts): try: - mybucket = s3.Bucket(bucket_name) - mykey = s3.Object(bucket_name, key_name) - if contents is not None: - mykey.put(Body=contents) - return mybucket, mykey + boto3.resource('s3').Object(BUCKET_NAME, KEY_NAME).put(Body=contents) + return except botocore.exceptions.ClientError as err: logger.error('caught %r, retrying', err) time.sleep(sleep_time) - assert False, 'failed to create bucket after %d attempts' % num_attempts + assert False, 'failed to create bucket %s after %d attempts' % (BUCKET_NAME, num_attempts) def ignore_resource_warnings(): @@ -98,14 +109,13 @@ def setUp(self): def tearDown(self): smart_open.s3.DEFAULT_MIN_PART_SIZE = self.old_min_part_size - s3 = boto3.resource('s3') - cleanup_bucket(s3, delete_bucket=True) + cleanup_bucket() def test_iter(self): """Are S3 files iterated over correctly?""" # a list of strings to test with expected = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=expected) + put_to_bucket(contents=expected) # connect to fake s3 and read from the fake key we filled above fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) @@ -115,7 +125,7 @@ def test_iter(self): def test_iter_context_manager(self): # same thing but using a context manager expected = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=expected) + put_to_bucket(contents=expected) with smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) as fin: output = [line.rstrip(b'\n') for line in fin] self.assertEqual(output, expected.split(b'\n')) @@ -123,7 +133,7 @@ def test_iter_context_manager(self): def test_read(self): """Are S3 files read correctly?""" content = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) logger.debug('content: %r len: %r', content, len(content)) fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) @@ -134,7 +144,7 @@ def test_read(self): def test_seek_beginning(self): """Does seeking to the beginning of S3 files work correctly?""" content = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) self.assertEqual(content[:6], fin.read(6)) @@ -149,7 +159,7 @@ def test_seek_beginning(self): def test_seek_start(self): """Does seeking from the start of S3 files work correctly?""" content = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) seek = fin.seek(6) @@ -160,7 +170,7 @@ def test_seek_start(self): def test_seek_current(self): """Does seeking from the middle of S3 files work correctly?""" content = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) self.assertEqual(fin.read(5), b'hello') @@ -171,7 +181,7 @@ def test_seek_current(self): def test_seek_end(self): """Does seeking from the end of S3 files work correctly?""" content = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) seek = fin.seek(-4, whence=smart_open.s3.END) @@ -180,7 +190,7 @@ def test_seek_end(self): def test_detect_eof(self): content = u"hello wořld\nhow are you?".encode('utf8') - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) fin.read() @@ -195,7 +205,7 @@ def test_read_gzip(self): buf.close = lambda: None # keep buffer open so that we can .getvalue() with gzip.GzipFile(fileobj=buf, mode='w') as zipfile: zipfile.write(expected) - create_bucket_and_key(contents=buf.getvalue()) + put_to_bucket(contents=buf.getvalue()) # # Make sure we're reading things correctly. @@ -219,7 +229,7 @@ def test_read_gzip(self): def test_readline(self): content = b'englishman\nin\nnew\nyork\n' - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) with smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) as fin: fin.readline() @@ -234,7 +244,7 @@ def test_readline(self): def test_readline_tiny_buffer(self): content = b'englishman\nin\nnew\nyork\n' - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) with smart_open.s3.BufferedInputBase(BUCKET_NAME, KEY_NAME, buffer_size=8) as fin: actual = list(fin) @@ -244,7 +254,7 @@ def test_readline_tiny_buffer(self): def test_read0_does_not_return_data(self): content = b'englishman\nin\nnew\nyork\n' - create_bucket_and_key(contents=content) + put_to_bucket(contents=content) with smart_open.s3.BufferedInputBase(BUCKET_NAME, KEY_NAME) as fin: data = fin.read(0) @@ -262,12 +272,10 @@ def setUp(self): ignore_resource_warnings() def tearDown(self): - s3 = boto3.resource('s3') - cleanup_bucket(s3, delete_bucket=True) + cleanup_bucket() def test_write_01(self): """Does writing into s3 work correctly?""" - create_bucket_and_key() test_string = u"žluťoučký koníček".encode('utf8') # write into key @@ -281,8 +289,6 @@ def test_write_01(self): def test_write_01a(self): """Does s3 write fail on incorrect input?""" - create_bucket_and_key() - try: with smart_open.s3.BufferedOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fin: fin.write(None) @@ -293,8 +299,6 @@ def test_write_01a(self): def test_write_02(self): """Does s3 write unicode-utf8 conversion work?""" - create_bucket_and_key() - smart_open_write = smart_open.s3.BufferedOutputBase(BUCKET_NAME, WRITE_KEY_NAME) smart_open_write.tell() logger.info("smart_open_write: %r", smart_open_write) @@ -304,8 +308,6 @@ def test_write_02(self): def test_write_03(self): """Does s3 multipart chunking work correctly?""" - create_bucket_and_key() - # write smart_open_write = smart_open.s3.BufferedOutputBase( BUCKET_NAME, WRITE_KEY_NAME, min_part_size=10 @@ -328,8 +330,6 @@ def test_write_03(self): def test_write_04(self): """Does writing no data cause key with an empty value to be created?""" - _ = create_bucket_and_key() - smart_open_write = smart_open.s3.BufferedOutputBase(BUCKET_NAME, WRITE_KEY_NAME) with smart_open_write as fout: # noqa pass @@ -340,8 +340,6 @@ def test_write_04(self): self.assertEqual(output, []) def test_gzip(self): - create_bucket_and_key() - expected = u'а не спеть ли мне песню... о любви'.encode('utf-8') with smart_open.s3.BufferedOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout: with gzip.GzipFile(fileobj=fout, mode='w') as zipfile: @@ -358,8 +356,6 @@ def test_buffered_writer_wrapper_works(self): Ensure that we can wrap a smart_open s3 stream in a BufferedWriter, which passes a memoryview object to the underlying stream in python >= 2.7 """ - - create_bucket_and_key() expected = u'не думай о секундах свысока' with smart_open.s3.BufferedOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout: @@ -374,7 +370,7 @@ def test_buffered_writer_wrapper_works(self): def test_binary_iterator(self): expected = u"выйду ночью в поле с конём".encode('utf-8').split(b' ') - create_bucket_and_key(contents=b"\n".join(expected)) + put_to_bucket(contents=b"\n".join(expected)) with smart_open.s3.open(BUCKET_NAME, KEY_NAME, 'rb') as fin: actual = [line.rstrip() for line in fin] self.assertEqual(expected, actual) @@ -391,8 +387,6 @@ def test_read_nonexisting_key(self): fin.read() def test_double_close(self): - create_bucket_and_key() - text = u'там за туманами, вечными, пьяными'.encode('utf-8') fout = smart_open.s3.open(BUCKET_NAME, 'key', 'wb') fout.write(text) @@ -400,8 +394,6 @@ def test_double_close(self): fout.close() def test_flush_close(self): - create_bucket_and_key() - text = u'там за туманами, вечными, пьяными'.encode('utf-8') fout = smart_open.s3.open(BUCKET_NAME, 'key', 'wb') fout.write(text) @@ -424,6 +416,9 @@ class IterBucketTest(unittest.TestCase): def setUp(self): ignore_resource_warnings() + def tearDown(self): + cleanup_bucket() + def test_iter_bucket(self): populate_bucket() results = list(smart_open.s3.iter_bucket(BUCKET_NAME)) @@ -431,8 +426,7 @@ def test_iter_bucket(self): def test_accepts_boto3_bucket(self): populate_bucket() - s3 = boto3.resource('s3') - bucket = s3.Bucket(BUCKET_NAME) + bucket = boto3.resource('s3').Bucket(BUCKET_NAME) results = list(smart_open.s3.iter_bucket(bucket)) self.assertEqual(len(results), 10) @@ -462,8 +456,6 @@ def test_list_bucket_long(self): def test_old(self): """Does s3_iter_bucket work correctly?""" - create_bucket_and_key() - # # Use an old-school boto Bucket class for historical reasons. # @@ -511,6 +503,7 @@ def setUp(self): def tearDown(self): smart_open.s3._MULTIPROCESSING = self.old_flag + cleanup_bucket() def test(self): num_keys = 101 @@ -527,16 +520,19 @@ class DownloadKeyTest(unittest.TestCase): def setUp(self): ignore_resource_warnings() + def tearDown(self): + cleanup_bucket() + def test_happy(self): contents = b'hello' - create_bucket_and_key(contents=contents) + put_to_bucket(contents=contents) expected = (KEY_NAME, contents) actual = smart_open.s3._download_key(KEY_NAME, bucket_name=BUCKET_NAME) self.assertEqual(expected, actual) def test_intermittent_error(self): contents = b'hello' - create_bucket_and_key(contents=contents) + put_to_bucket(contents=contents) expected = (KEY_NAME, contents) side_effect = [ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR, contents] with mock.patch('smart_open.s3._download_fileobj', side_effect=side_effect): @@ -545,7 +541,7 @@ def test_intermittent_error(self): def test_persistent_error(self): contents = b'hello' - create_bucket_and_key(contents=contents) + put_to_bucket(contents=contents) side_effect = [ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR] with mock.patch('smart_open.s3._download_fileobj', side_effect=side_effect): @@ -554,7 +550,7 @@ def test_persistent_error(self): def test_intermittent_error_retries(self): contents = b'hello' - create_bucket_and_key(contents=contents) + put_to_bucket(contents=contents) expected = (KEY_NAME, contents) side_effect = [ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR, ARBITRARY_CLIENT_ERROR, contents] @@ -564,7 +560,7 @@ def test_intermittent_error_retries(self): def test_propagates_other_exception(self): contents = b'hello' - create_bucket_and_key(contents=contents) + put_to_bucket(contents=contents) with mock.patch('smart_open.s3._download_fileobj', side_effect=ValueError): self.assertRaises(ValueError, smart_open.s3._download_key, KEY_NAME, bucket_name=BUCKET_NAME) @@ -575,11 +571,11 @@ class OpenTest(unittest.TestCase): def setUp(self): ignore_resource_warnings() + def tearDown(self): + cleanup_bucket() + def test_read_never_returns_none(self): """read should never return None.""" - s3 = boto3.resource('s3') - s3.create_bucket(Bucket=BUCKET_NAME) - test_string = u"ветер по морю гуляет..." with smart_open.s3.open(BUCKET_NAME, KEY_NAME, "wb") as fout: fout.write(test_string.encode('utf8')) @@ -590,20 +586,14 @@ def test_read_never_returns_none(self): self.assertEqual(r.read(), b"") -def populate_bucket(bucket_name=BUCKET_NAME, num_keys=10): +def populate_bucket(num_keys=10): # fake (or not) connection, bucket and key logger.debug('%r', locals()) - s3 = boto3.resource('s3') - bucket_exist = cleanup_bucket(s3) - - if not bucket_exist: - mybucket = s3.create_bucket(Bucket=bucket_name) - - mybucket = s3.Bucket(bucket_name) + s3 = boto3.resource('s3') for key_number in range(num_keys): key_name = 'key_%d' % key_number - s3.Object(bucket_name, key_name).put(Body=str(key_number)) + s3.Object(BUCKET_NAME, key_name).put(Body=str(key_number)) if __name__ == '__main__':