TST: Made s3 related tests mock boto

Kept a couple around for testing things like accessing a private bucket as that's hard to mock. Try the pip counterparts Some more merge request changes
pandas-dev · Sep 12, 2017 · c5176d6 · c5176d6
1 parent 83436af
commit c5176d6
Show file tree

Hide file tree

Showing 12 changed files with 102 additions and 67 deletions.
diff --git a/appveyor.yml b/appveyor.yml
@@ -74,12 +74,18 @@ install:
   # create our env
   - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
   - cmd: activate pandas
+  - cmd: pip install moto
   - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
   - cmd: echo "installing requirements from %REQ%"
   - cmd: conda install -n pandas --file=%REQ%
   - cmd: conda list -n pandas
   - cmd: echo "installing requirements from %REQ% - done"
 
+  # add some pip only reqs to the env
+  - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
+  - cmd: echo "installing requirements from %REQ%"
+  - cmd: pip install -Ur %REQ%
+
   # build em using the local source checkout in the correct windows env
   - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
 

diff --git a/ci/install_circle.sh b/ci/install_circle.sh
@@ -65,6 +65,7 @@ fi
 echo "[create env: ${REQ_BUILD}]"
 time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
 time conda install -n pandas pytest>=3.1.0 || exit 1
+time pip install moto || exit 1
 
 source activate pandas
 

diff --git a/ci/install_travis.sh b/ci/install_travis.sh
@@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then
 fi
 
 time conda install -n pandas pytest>=3.1.0
-time pip install pytest-xdist
+time pip install pytest-xdist moto
 
 if [ "$LINT" ]; then
    conda install flake8

diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip
diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip
diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
@@ -5,3 +5,4 @@ cython
 pytest>=3.1.0
 pytest-cov
 flake8
+moto
diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2
diff --git a/pandas/tests/io/parser/data/tips.csv.gz b/pandas/tests/io/parser/data/tips.csv.gz
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -4,13 +4,20 @@
 Tests parsers ability to read and parse non-local files
 and hence require a network connection to be read.
 """
-
 import os
+
 import pytest
+import moto
 
 import pandas.util.testing as tm
 from pandas import DataFrame
 from pandas.io.parsers import read_csv, read_table
+from pandas.compat import BytesIO
+
+
+@pytest.fixture(scope='module')
+def tips_file():
+    return os.path.join(tm.get_data_path(), 'tips.csv')
 
 
 @pytest.fixture(scope='module')
@@ -19,6 +26,40 @@ def salaries_table():
     return read_table(path)
 
 
+@pytest.fixture(scope='module')
+def test_s3_resource(tips_file):
+    pytest.importorskip('s3fs')
+    moto.mock_s3().start()
+
+    test_s3_files = [
+        ('tips.csv', tips_file),
+        ('tips.csv.gz', tips_file + '.gz'),
+        ('tips.csv.bz2', tips_file + '.bz2'),
+    ]
+
+    def add_tips_files(bucket_name):
+        for s3_key, file_name in test_s3_files:
+            with open(file_name, 'rb') as f:
+                conn.Bucket(bucket_name).put_object(
+                    Key=s3_key,
+                    Body=f)
+
+    boto3 = pytest.importorskip('boto3')
+    # see gh-16135
+    bucket = 'pandas-test'
+
+    conn = boto3.resource("s3", region_name="us-east-1")
+    conn.create_bucket(Bucket=bucket)
+    add_tips_files(bucket)
+
+    conn.create_bucket(Bucket='cant_get_it', ACL='private')
+    add_tips_files('cant_get_it')
+
+    yield conn
+
+    moto.mock_s3().stop()
+
+
 @pytest.mark.network
 @pytest.mark.parametrize(
     "compression,extension",
@@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
 
 
 class TestS3(object):
-
-    def setup_method(self, method):
-        try:
-            import s3fs  # noqa
-        except ImportError:
-            pytest.skip("s3fs not installed")
-
     @tm.network
     def test_parse_public_s3_bucket(self):
+        pytest.importorskip('s3fs')
+        # more of an integration test due to the not-public contents portion
+        # can probably mock this though.
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, compression=comp)
@@ -74,26 +111,24 @@ def test_parse_public_s3_bucket(self):
         assert not df.empty
         tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_parse_public_s3n_bucket(self):
+    def test_parse_public_s3n_bucket(self, test_s3_resource):
+
         # Read from AWS s3 as "s3n" URL
         df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3a_bucket(self):
+    def test_parse_public_s3a_bucket(self, test_s3_resource):
         # Read from AWS s3 as "s3a" URL
         df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_nrows(self):
+    def test_parse_public_s3_bucket_nrows(self, test_s3_resource):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, nrows=10, compression=comp)
@@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_chunked(self):
+    def test_parse_public_s3_bucket_chunked(self, test_s3_resource):
         # Read with a chunksize
         chunksize = 5
         local_tips = read_csv(tm.get_data_path('tips.csv'))
@@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self):
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_chunked_python(self):
+    def test_parse_public_s3_bucket_chunked_python(self, test_s3_resource):
         # Read with a chunksize using the Python parser
         chunksize = 5
         local_tips = read_csv(tm.get_data_path('tips.csv'))
@@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self):
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_python(self):
+    def test_parse_public_s3_bucket_python(self, test_s3_resource):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           compression=comp)
@@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_infer_s3_compression(self):
+    def test_infer_s3_compression(self, test_s3_resource):
         for ext in ['', '.gz', '.bz2']:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           engine='python', compression='infer')
@@ -160,8 +191,7 @@ def test_infer_s3_compression(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_nrows_python(self):
+    def test_parse_public_s3_bucket_nrows_python(self, test_s3_resource):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           nrows=10, compression=comp)
@@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_s3_fails(self):
+    def test_s3_fails(self, test_s3_resource):
         with pytest.raises(IOError):
             read_csv('s3://nyqpug/asdf.csv')
 
@@ -180,21 +209,18 @@ def test_s3_fails(self):
         with pytest.raises(IOError):
             read_csv('s3://cant_get_it/')
 
-    @tm.network
-    def boto3_client_s3(self):
+    def test_read_csv_handles_boto_s3_object(self,
+                                             test_s3_resource,
+                                             tips_file):
         # see gh-16135
 
-        # boto3 is a dependency of s3fs
-        import boto3
-        client = boto3.client("s3")
-
-        key = "/tips.csv"
-        bucket = "pandas-test"
-        s3_object = client.get_object(Bucket=bucket, Key=key)
+        s3_object = test_s3_resource.meta.client.get_object(
+            Bucket='pandas-test',
+            Key='tips.csv')
 
-        result = read_csv(s3_object["Body"])
+        result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8')
         assert isinstance(result, DataFrame)
         assert not result.empty
 
-        expected = read_csv(tm.get_data_path('tips.csv'))
+        expected = read_csv(tips_file)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
@@ -1,33 +1,32 @@
 # pylint: disable=E1101
-
-from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems
-from datetime import datetime, date, time
-import sys
+import functools
+import operator
 import os
+import sys
+import warnings
+from datetime import datetime, date, time
 from distutils.version import LooseVersion
 from functools import partial
-
-import warnings
 from warnings import catch_warnings
-import operator
-import functools
-import pytest
 
-from numpy import nan
 import numpy as np
+import pytest
+from numpy import nan
+import moto
 
 import pandas as pd
+import pandas.util.testing as tm
 from pandas import DataFrame, Index, MultiIndex
-from pandas.io.formats.excel import ExcelFormatter
-from pandas.io.parsers import read_csv
+from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems
+from pandas.core.config import set_option, get_option
+from pandas.io.common import URLError
 from pandas.io.excel import (
     ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer,
     _Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter
 )
-from pandas.io.common import URLError
+from pandas.io.formats.excel import ExcelFormatter
+from pandas.io.parsers import read_csv
 from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf
-from pandas.core.config import set_option, get_option
-import pandas.util.testing as tm
 
 
 def _skip_if_no_xlrd():
@@ -67,13 +66,6 @@ def _skip_if_no_excelsuite():
     _skip_if_no_openpyxl()
 
 
-def _skip_if_no_s3fs():
-    try:
-        import s3fs  # noqa
-    except ImportError:
-        pytest.skip('s3fs not installed, skipping')
-
-
 _seriesd = tm.getSeriesData()
 _tsd = tm.getTimeSeriesData()
 _frame = DataFrame(_seriesd)[:10]
@@ -605,14 +597,22 @@ def test_read_from_http_url(self):
         local_table = self.get_exceldf('test1')
         tm.assert_frame_equal(url_table, local_table)
 
-    @tm.network(check_before_test=True)
     def test_read_from_s3_url(self):
-        _skip_if_no_s3fs()
-
-        url = ('s3://pandas-test/test1' + self.ext)
-        url_table = read_excel(url)
-        local_table = self.get_exceldf('test1')
-        tm.assert_frame_equal(url_table, local_table)
+        boto3 = pytest.importorskip('boto3')
+        pytest.importorskip('s3fs')
+
+        with moto.mock_s3():
+            conn = boto3.resource("s3", region_name="us-east-1")
+            conn.create_bucket(Bucket="pandas-test")
+            file_name = os.path.join(self.dirpath, 'test1' + self.ext)
+            with open(file_name, 'rb') as f:
+                conn.Bucket("pandas-test").put_object(Key="test1" + self.ext,
+                                                      Body=f)
+
+            url = ('s3://pandas-test/test1' + self.ext)
+            url_table = read_excel(url)
+            local_table = self.get_exceldf('test1')
+            tm.assert_frame_equal(url_table, local_table)
 
     @pytest.mark.slow
     def test_read_from_file_url(self):

diff --git a/tox.ini b/tox.ini
@@ -19,6 +19,7 @@ deps =
     xlrd
     six
     sqlalchemy
+    moto
 
 # cd to anything but the default {toxinidir} which
 # contains the pandas subdirectory and confuses