Skip to content

Commit

Permalink
Add integration testing + benchmark with real S3. Partial fix #151, #156
Browse files Browse the repository at this point in the history
 (#158)

* add extra_req[test] (for pip)

* basic code that make possible to run tests with real S3

* remove py26 outdated stuff

* fix pep

* Enable real s3 testing for 27, 36 with encrypted credentials + fixes for Travis

* fix travisfile

* fix travis[2]

* try to understand what happens with travis

* fix travis[3]

* use different bucket to avoiding raise condition

* generate unique bucket name (prevent race condition) + cleanup S3 after tests

* move api keys to global, add benchmark

* add explicit encoding for file  + run bench in script session + fix path

* fix assert + add distinct buckets for benchmark results

* get tests running under Py2.7

* update integration test documentation

* Avoid race condition during integration tests

Several py2.7/py3.6 tests may be running at the same time, so we want to
make sure they face different keys to avoid a race condition

* remove quotes to keep travis happy

* add missing semicolon

* fail the build if integration tests fail

* deliberately fail travis build to check .travis.yml

* Revert "fail the build if integration tests fail"

This reverts commit 3061f5b.

* Revert "Revert "fail the build if integration tests fail""

This reverts commit 2dec2b4.

* Revert "deliberately fail travis build to check .travis.yml"

This reverts commit 4bf0e11.

* return getdefaultencoding (mimic to `open`) + fix missing deps for integration-tests + fix benchmark encoding problem
  • Loading branch information
menshikh-iv committed Dec 22, 2017
1 parent fbc82cc commit 6f3a698
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 70 deletions.
54 changes: 46 additions & 8 deletions .travis.yml
@@ -1,14 +1,52 @@
language: python
dist: trusty
sudo: false

python:
- "2.7"
- "3.3"
- "3.4"
- "3.5"
- "3.6"
env:
global:
- secure: "GH+DI7f9QenVtTZFEfn4B8wO5JJK65PkHDg8vY/npdW51y5PPAynTEf/++D0H4tjwqMuXwB5lrbWfYeskf29Xuq5MT5+FixajZFcbhscoSM9CGABeph2s2+Hm4kSIKmjnTlQLgJHVbwypnlU/W4sfMCbqeOmv2fYXCCt0GhtnWc="
- secure: "du3PQYEiDPw55TRzhk+Ocv1Gx1DuusbCSMVSQmccAoyOr7qxDqm+1jh2v13RmajNK7FnlKyC4xSTySVpSl70By2uhZlJT43EpenqIcpQWqUAm3nVr6etszdb1A6TfEGQrxZ8Y2j9KD6QAMNovsMZbl0bcDJDxTeFA4P/yU9UZcI="


matrix:
include:
- python: '2.7'
env:
- SO_DISABLE_MOCKS: "1"
- SO_S3_URL: "s3://smart-open-py27-benchmark"
- SO_S3_RESULT_URL: "s3://smart-open-py27-benchmark-results"

- python: '3.3'

- python: '3.4'

- python: '3.5'

- python: '3.6'
env:
- SO_DISABLE_MOCKS: "1"
- SO_S3_URL: "s3://smart-open-py36-benchmark"
- SO_S3_RESULT_URL: "s3://smart-open-py36-benchmark-results"

install:
- python setup.py install
- pip install .[test]
- pip freeze

script: python -W ignore setup.py test

script:
- python setup.py test
- export SO_S3_URL=$SO_S3_URL/$(python -c 'from uuid import uuid4;print(uuid4())')
- if [[ ${SO_DISABLE_MOCKS} = "1" ]]; then
pip install pytest pytest_benchmark awscli;
set -e;
py.test integration-tests/test_s3.py --benchmark-save=`git rev-parse HEAD`;
set +e;
aws s3 cp .benchmarks/*/*.json ${SO_S3_RESULT_URL};
aws s3 rm --recursive $SO_S3_URL;
fi


cache:
directories:
- "$HOME/.cache/pip"
- "$HOME/.pyenv"
4 changes: 2 additions & 2 deletions integration-tests/README.md
Expand Up @@ -2,11 +2,11 @@ This directory contains integration tests for smart_open.
To run the tests, you need read/write access to an S3 bucket.
Also, you need to install py.test and its benchmarks addon:

pip install pytest pytest_benchmark
pip install -r requirements.txt

Then, to run the tests, run:

SMART_OPEN_S3_URL=s3://bucket/smart_open_test py.test integration-tests/test_s3.py
SO_S3_URL=s3://bucket/smart_open_test py.test integration-tests/test_s3.py

You may use any key name instead of "smart_open_test".
It does not have to be an existing key.
Expand Down
3 changes: 3 additions & 0 deletions integration-tests/requirements.txt
@@ -0,0 +1,3 @@
pytest
pytest_benchmark
awscli
16 changes: 9 additions & 7 deletions integration-tests/test_s3.py
@@ -1,22 +1,24 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
import io
import os
import subprocess

import smart_open

_S3_URL = os.environ.get('SMART_OPEN_S3_URL')
assert _S3_URL is not None, 'please set the SMART_OPEN_S3_URL environment variable'
_S3_URL = os.environ.get('SO_S3_URL')
assert _S3_URL is not None, 'please set the SO_S3_URL environment variable'


def initialize_bucket():
subprocess.check_call(['aws', 's3', 'rm', '--recursive', _S3_URL])


def write_read(key, content, write_mode, read_mode):
with smart_open.smart_open(key, write_mode) as fout:
def write_read(key, content, write_mode, read_mode, encoding=None):
with smart_open.smart_open(key, write_mode, encoding=encoding) as fout:
fout.write(content)
with smart_open.smart_open(key, read_mode) as fin:
with smart_open.smart_open(key, read_mode, encoding=encoding) as fin:
actual = fin.read()
return actual

Expand All @@ -26,7 +28,7 @@ def test_s3_readwrite_text(benchmark):

key = _S3_URL + '/sanity.txt'
text = 'с гранатою в кармане, с чекою в руке'
actual = benchmark(write_read, key, text, 'w', 'r')
actual = benchmark(write_read, key, text, 'w', 'r', 'utf-8')
assert actual == text


Expand All @@ -35,7 +37,7 @@ def test_s3_readwrite_text_gzip(benchmark):

key = _S3_URL + '/sanity.txt.gz'
text = 'не чайки здесь запели на знакомом языке'
actual = benchmark(write_read, key, text, 'w', 'r')
actual = benchmark(write_read, key, text, 'w', 'r', 'utf-8')
assert actual == text


Expand Down
18 changes: 10 additions & 8 deletions setup.py
Expand Up @@ -16,6 +16,12 @@ def read(fname):
return io.open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read()


tests_require = [
'mock',
'moto==0.4.31',
'responses',
]

setup(
name='smart_open',
version='1.5.5',
Expand Down Expand Up @@ -44,14 +50,10 @@ def read(fname):
'requests',
'boto3'
],

tests_require=[
'mock',
'moto==0.4.31',
'responses',
'unittest2'
],

tests_require=tests_require,
extras_require={
'test': tests_require,
},

test_suite="smart_open.tests",

Expand Down
6 changes: 3 additions & 3 deletions smart_open/smart_open_lib.py
Expand Up @@ -697,7 +697,7 @@ def __init__(self, url, mode='r', kerberos=False, user=None, password=None):
auth = (user, password)
else:
auth = None

self.response = requests.get(url, auth=auth, stream=True)

if not self.response.ok:
Expand Down Expand Up @@ -747,7 +747,7 @@ def read(self, size=None):
if self._read_iter is None:
self._read_iter = self.response.iter_content(size)
self._read_buffer = next(self._read_iter)

while len(self._read_buffer) < size:
try:
self._read_buffer += next(self._read_iter)
Expand All @@ -760,7 +760,7 @@ def read(self, size=None):
return ''
else:
return retval

# If we got here, it means we have enough data in the buffer
# to return to the caller.
retval = self._read_buffer[:size]
Expand Down

0 comments on commit 6f3a698

Please sign in to comment.