Add integration testing + benchmark with real S3. Partial fix #151, #156

(#158) * add extra_req[test] (for pip) * basic code that make possible to run tests with real S3 * remove py26 outdated stuff * fix pep * Enable real s3 testing for 27, 36 with encrypted credentials + fixes for Travis * fix travisfile * fix travis[2] * try to understand what happens with travis * fix travis[3] * use different bucket to avoiding raise condition * generate unique bucket name (prevent race condition) + cleanup S3 after tests * move api keys to global, add benchmark * add explicit encoding for file + run bench in script session + fix path * fix assert + add distinct buckets for benchmark results * get tests running under Py2.7 * update integration test documentation * Avoid race condition during integration tests Several py2.7/py3.6 tests may be running at the same time, so we want to make sure they face different keys to avoid a race condition * remove quotes to keep travis happy * add missing semicolon * fail the build if integration tests fail * deliberately fail travis build to check .travis.yml * Revert "fail the build if integration tests fail" This reverts commit 3061f5b. * Revert "Revert "fail the build if integration tests fail"" This reverts commit 2dec2b4. * Revert "deliberately fail travis build to check .travis.yml" This reverts commit 4bf0e11. * return getdefaultencoding (mimic to `open`) + fix missing deps for integration-tests + fix benchmark encoding problem
piskvorky · Dec 22, 2017 · 6f3a698 · 6f3a698
1 parent fbc82cc
commit 6f3a698
Show file tree

Hide file tree

Showing 8 changed files with 148 additions and 70 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,14 +1,52 @@
 language: python
+dist: trusty
+sudo: false
 
-python:
-  - "2.7"
-  - "3.3"
-  - "3.4"
-  - "3.5"
-  - "3.6"
+env:
+  global:
+    - secure: "GH+DI7f9QenVtTZFEfn4B8wO5JJK65PkHDg8vY/npdW51y5PPAynTEf/++D0H4tjwqMuXwB5lrbWfYeskf29Xuq5MT5+FixajZFcbhscoSM9CGABeph2s2+Hm4kSIKmjnTlQLgJHVbwypnlU/W4sfMCbqeOmv2fYXCCt0GhtnWc="
+    - secure: "du3PQYEiDPw55TRzhk+Ocv1Gx1DuusbCSMVSQmccAoyOr7qxDqm+1jh2v13RmajNK7FnlKyC4xSTySVpSl70By2uhZlJT43EpenqIcpQWqUAm3nVr6etszdb1A6TfEGQrxZ8Y2j9KD6QAMNovsMZbl0bcDJDxTeFA4P/yU9UZcI="
+
+
+matrix:
+  include:
+    - python: '2.7'
+      env:
+        - SO_DISABLE_MOCKS: "1"
+        - SO_S3_URL: "s3://smart-open-py27-benchmark"
+        - SO_S3_RESULT_URL: "s3://smart-open-py27-benchmark-results"
+
+    - python: '3.3'
+
+    - python: '3.4'
+
+    - python: '3.5'
+
+    - python: '3.6'
+      env:
+        - SO_DISABLE_MOCKS: "1"
+        - SO_S3_URL: "s3://smart-open-py36-benchmark"
+        - SO_S3_RESULT_URL: "s3://smart-open-py36-benchmark-results"
 
 install:
-  - python setup.py install
+  - pip install .[test]
   - pip freeze
 
-script: python -W ignore setup.py test
+
+script:
+  - python setup.py test
+  - export SO_S3_URL=$SO_S3_URL/$(python -c 'from uuid import uuid4;print(uuid4())')
+  - if [[ ${SO_DISABLE_MOCKS} = "1" ]]; then
+      pip install pytest pytest_benchmark awscli;
+      set -e;
+      py.test integration-tests/test_s3.py --benchmark-save=`git rev-parse HEAD`;
+      set +e;
+      aws s3 cp .benchmarks/*/*.json ${SO_S3_RESULT_URL};
+      aws s3 rm --recursive $SO_S3_URL;
+    fi
+
+
+cache:
+  directories:
+  - "$HOME/.cache/pip"
+  - "$HOME/.pyenv"
diff --git a/integration-tests/README.md b/integration-tests/README.md
@@ -2,11 +2,11 @@ This directory contains integration tests for smart_open.
 To run the tests, you need read/write access to an S3 bucket.
 Also, you need to install py.test and its benchmarks addon:
 
-    pip install pytest pytest_benchmark
+    pip install -r requirements.txt
 
 Then, to run the tests, run:
 
-    SMART_OPEN_S3_URL=s3://bucket/smart_open_test py.test integration-tests/test_s3.py
+    SO_S3_URL=s3://bucket/smart_open_test py.test integration-tests/test_s3.py
 
 You may use any key name instead of "smart_open_test".
 It does not have to be an existing key.

diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
@@ -0,0 +1,3 @@
+pytest
+pytest_benchmark
+awscli
diff --git a/integration-tests/test_s3.py b/integration-tests/test_s3.py
@@ -1,22 +1,24 @@
+# -*- coding: utf-8 -*-
+
 from __future__ import unicode_literals
 import io
 import os
 import subprocess
 
 import smart_open
 
-_S3_URL = os.environ.get('SMART_OPEN_S3_URL')
-assert _S3_URL is not None, 'please set the SMART_OPEN_S3_URL environment variable'
+_S3_URL = os.environ.get('SO_S3_URL')
+assert _S3_URL is not None, 'please set the SO_S3_URL environment variable'
 
 
 def initialize_bucket():
     subprocess.check_call(['aws', 's3', 'rm', '--recursive', _S3_URL])
 
 
-def write_read(key, content, write_mode, read_mode):
-    with smart_open.smart_open(key, write_mode) as fout:
+def write_read(key, content, write_mode, read_mode, encoding=None):
+    with smart_open.smart_open(key, write_mode, encoding=encoding) as fout:
         fout.write(content)
-    with smart_open.smart_open(key, read_mode) as fin:
+    with smart_open.smart_open(key, read_mode, encoding=encoding) as fin:
         actual = fin.read()
     return actual
 
@@ -26,7 +28,7 @@ def test_s3_readwrite_text(benchmark):
 
     key = _S3_URL + '/sanity.txt'
     text = 'с гранатою в кармане, с чекою в руке'
-    actual = benchmark(write_read, key, text, 'w', 'r')
+    actual = benchmark(write_read, key, text, 'w', 'r', 'utf-8')
     assert actual == text
 
 
@@ -35,7 +37,7 @@ def test_s3_readwrite_text_gzip(benchmark):
 
     key = _S3_URL + '/sanity.txt.gz'
     text = 'не чайки здесь запели на знакомом языке'
-    actual = benchmark(write_read, key, text, 'w', 'r')
+    actual = benchmark(write_read, key, text, 'w', 'r', 'utf-8')
     assert actual == text
 
 

diff --git a/setup.py b/setup.py
@@ -16,6 +16,12 @@ def read(fname):
     return io.open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read()
 
 
+tests_require = [
+    'mock',
+    'moto==0.4.31',
+    'responses',
+]
+
 setup(
     name='smart_open',
     version='1.5.5',
@@ -44,14 +50,10 @@ def read(fname):
         'requests',
         'boto3'
     ],
-
-    tests_require=[
-        'mock',
-        'moto==0.4.31',
-        'responses',
-        'unittest2'
-    ],
-
+    tests_require=tests_require,
+    extras_require={
+        'test': tests_require,
+    },
 
     test_suite="smart_open.tests",
 

diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py
@@ -697,7 +697,7 @@ def __init__(self, url, mode='r', kerberos=False, user=None, password=None):
             auth = (user, password)
         else:
             auth = None
-        
+
         self.response = requests.get(url, auth=auth, stream=True)
 
         if not self.response.ok:
@@ -747,7 +747,7 @@ def read(self, size=None):
             if self._read_iter is None:
                 self._read_iter = self.response.iter_content(size)
                 self._read_buffer = next(self._read_iter)
-            
+
             while len(self._read_buffer) < size:
                 try:
                     self._read_buffer += next(self._read_iter)
@@ -760,7 +760,7 @@ def read(self, size=None):
                         return ''
                     else:
                         return retval
-            
+
             # If we got here, it means we have enough data in the buffer
             # to return to the caller.
             retval = self._read_buffer[:size]