Skip to content

Commit

Permalink
TST: Made s3 related tests mock boto
Browse files Browse the repository at this point in the history
Kept a couple around for testing things like accessing a private bucket as that's hard to mock.

Try the pip counterparts

Some more merge request changes
  • Loading branch information
kirkhansen committed Sep 12, 2017
1 parent 83436af commit c5176d6
Show file tree
Hide file tree
Showing 12 changed files with 102 additions and 67 deletions.
6 changes: 6 additions & 0 deletions appveyor.yml
Expand Up @@ -74,12 +74,18 @@ install:
# create our env
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
- cmd: activate pandas
- cmd: pip install moto
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
- cmd: echo "installing requirements from %REQ%"
- cmd: conda install -n pandas --file=%REQ%
- cmd: conda list -n pandas
- cmd: echo "installing requirements from %REQ% - done"

# add some pip only reqs to the env
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
- cmd: echo "installing requirements from %REQ%"
- cmd: pip install -Ur %REQ%

# build em using the local source checkout in the correct windows env
- cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'

Expand Down
1 change: 1 addition & 0 deletions ci/install_circle.sh
Expand Up @@ -65,6 +65,7 @@ fi
echo "[create env: ${REQ_BUILD}]"
time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
time conda install -n pandas pytest>=3.1.0 || exit 1
time pip install moto || exit 1

source activate pandas

Expand Down
2 changes: 1 addition & 1 deletion ci/install_travis.sh
Expand Up @@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then
fi

time conda install -n pandas pytest>=3.1.0
time pip install pytest-xdist
time pip install pytest-xdist moto

if [ "$LINT" ]; then
conda install flake8
Expand Down
Empty file added ci/requirements-2.7_WIN.pip
Empty file.
Empty file.
Empty file added ci/requirements-3.6_WIN.pip
Empty file.
1 change: 1 addition & 0 deletions ci/requirements_dev.txt
Expand Up @@ -5,3 +5,4 @@ cython
pytest>=3.1.0
pytest-cov
flake8
moto
Binary file added pandas/tests/io/parser/data/tips.csv.bz2
Binary file not shown.
Binary file added pandas/tests/io/parser/data/tips.csv.gz
Binary file not shown.
100 changes: 63 additions & 37 deletions pandas/tests/io/parser/test_network.py
Expand Up @@ -4,13 +4,20 @@
Tests parsers ability to read and parse non-local files
and hence require a network connection to be read.
"""

import os

import pytest
import moto

import pandas.util.testing as tm
from pandas import DataFrame
from pandas.io.parsers import read_csv, read_table
from pandas.compat import BytesIO


@pytest.fixture(scope='module')
def tips_file():
return os.path.join(tm.get_data_path(), 'tips.csv')


@pytest.fixture(scope='module')
Expand All @@ -19,6 +26,40 @@ def salaries_table():
return read_table(path)


@pytest.fixture(scope='module')
def test_s3_resource(tips_file):
pytest.importorskip('s3fs')
moto.mock_s3().start()

test_s3_files = [
('tips.csv', tips_file),
('tips.csv.gz', tips_file + '.gz'),
('tips.csv.bz2', tips_file + '.bz2'),
]

def add_tips_files(bucket_name):
for s3_key, file_name in test_s3_files:
with open(file_name, 'rb') as f:
conn.Bucket(bucket_name).put_object(
Key=s3_key,
Body=f)

boto3 = pytest.importorskip('boto3')
# see gh-16135
bucket = 'pandas-test'

conn = boto3.resource("s3", region_name="us-east-1")
conn.create_bucket(Bucket=bucket)
add_tips_files(bucket)

conn.create_bucket(Bucket='cant_get_it', ACL='private')
add_tips_files('cant_get_it')

yield conn

moto.mock_s3().stop()


@pytest.mark.network
@pytest.mark.parametrize(
"compression,extension",
Expand Down Expand Up @@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode,


class TestS3(object):

def setup_method(self, method):
try:
import s3fs # noqa
except ImportError:
pytest.skip("s3fs not installed")

@tm.network
def test_parse_public_s3_bucket(self):
pytest.importorskip('s3fs')
# more of an integration test due to the not-public contents portion
# can probably mock this though.
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df = read_csv('s3://pandas-test/tips.csv' +
ext, compression=comp)
Expand All @@ -74,26 +111,24 @@ def test_parse_public_s3_bucket(self):
assert not df.empty
tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)

@tm.network
def test_parse_public_s3n_bucket(self):
def test_parse_public_s3n_bucket(self, test_s3_resource):

# Read from AWS s3 as "s3n" URL
df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
assert isinstance(df, DataFrame)
assert not df.empty
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')).iloc[:10], df)

@tm.network
def test_parse_public_s3a_bucket(self):
def test_parse_public_s3a_bucket(self, test_s3_resource):
# Read from AWS s3 as "s3a" URL
df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
assert isinstance(df, DataFrame)
assert not df.empty
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')).iloc[:10], df)

@tm.network
def test_parse_public_s3_bucket_nrows(self):
def test_parse_public_s3_bucket_nrows(self, test_s3_resource):
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df = read_csv('s3://pandas-test/tips.csv' +
ext, nrows=10, compression=comp)
Expand All @@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self):
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')).iloc[:10], df)

@tm.network
def test_parse_public_s3_bucket_chunked(self):
def test_parse_public_s3_bucket_chunked(self, test_s3_resource):
# Read with a chunksize
chunksize = 5
local_tips = read_csv(tm.get_data_path('tips.csv'))
Expand All @@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self):
chunksize * i_chunk: chunksize * (i_chunk + 1)]
tm.assert_frame_equal(true_df, df)

@tm.network
def test_parse_public_s3_bucket_chunked_python(self):
def test_parse_public_s3_bucket_chunked_python(self, test_s3_resource):
# Read with a chunksize using the Python parser
chunksize = 5
local_tips = read_csv(tm.get_data_path('tips.csv'))
Expand All @@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self):
chunksize * i_chunk: chunksize * (i_chunk + 1)]
tm.assert_frame_equal(true_df, df)

@tm.network
def test_parse_public_s3_bucket_python(self):
def test_parse_public_s3_bucket_python(self, test_s3_resource):
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
compression=comp)
Expand All @@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self):
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')), df)

@tm.network
def test_infer_s3_compression(self):
def test_infer_s3_compression(self, test_s3_resource):
for ext in ['', '.gz', '.bz2']:
df = read_csv('s3://pandas-test/tips.csv' + ext,
engine='python', compression='infer')
Expand All @@ -160,8 +191,7 @@ def test_infer_s3_compression(self):
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')), df)

@tm.network
def test_parse_public_s3_bucket_nrows_python(self):
def test_parse_public_s3_bucket_nrows_python(self, test_s3_resource):
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
nrows=10, compression=comp)
Expand All @@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self):
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')).iloc[:10], df)

@tm.network
def test_s3_fails(self):
def test_s3_fails(self, test_s3_resource):
with pytest.raises(IOError):
read_csv('s3://nyqpug/asdf.csv')

Expand All @@ -180,21 +209,18 @@ def test_s3_fails(self):
with pytest.raises(IOError):
read_csv('s3://cant_get_it/')

@tm.network
def boto3_client_s3(self):
def test_read_csv_handles_boto_s3_object(self,
test_s3_resource,
tips_file):
# see gh-16135

# boto3 is a dependency of s3fs
import boto3
client = boto3.client("s3")

key = "/tips.csv"
bucket = "pandas-test"
s3_object = client.get_object(Bucket=bucket, Key=key)
s3_object = test_s3_resource.meta.client.get_object(
Bucket='pandas-test',
Key='tips.csv')

result = read_csv(s3_object["Body"])
result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8')
assert isinstance(result, DataFrame)
assert not result.empty

expected = read_csv(tm.get_data_path('tips.csv'))
expected = read_csv(tips_file)
tm.assert_frame_equal(result, expected)
58 changes: 29 additions & 29 deletions pandas/tests/io/test_excel.py
@@ -1,33 +1,32 @@
# pylint: disable=E1101

from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems
from datetime import datetime, date, time
import sys
import functools
import operator
import os
import sys
import warnings
from datetime import datetime, date, time
from distutils.version import LooseVersion
from functools import partial

import warnings
from warnings import catch_warnings
import operator
import functools
import pytest

from numpy import nan
import numpy as np
import pytest
from numpy import nan
import moto

import pandas as pd
import pandas.util.testing as tm
from pandas import DataFrame, Index, MultiIndex
from pandas.io.formats.excel import ExcelFormatter
from pandas.io.parsers import read_csv
from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems
from pandas.core.config import set_option, get_option
from pandas.io.common import URLError
from pandas.io.excel import (
ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer,
_Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter
)
from pandas.io.common import URLError
from pandas.io.formats.excel import ExcelFormatter
from pandas.io.parsers import read_csv
from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf
from pandas.core.config import set_option, get_option
import pandas.util.testing as tm


def _skip_if_no_xlrd():
Expand Down Expand Up @@ -67,13 +66,6 @@ def _skip_if_no_excelsuite():
_skip_if_no_openpyxl()


def _skip_if_no_s3fs():
try:
import s3fs # noqa
except ImportError:
pytest.skip('s3fs not installed, skipping')


_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
_frame = DataFrame(_seriesd)[:10]
Expand Down Expand Up @@ -605,14 +597,22 @@ def test_read_from_http_url(self):
local_table = self.get_exceldf('test1')
tm.assert_frame_equal(url_table, local_table)

@tm.network(check_before_test=True)
def test_read_from_s3_url(self):
_skip_if_no_s3fs()

url = ('s3://pandas-test/test1' + self.ext)
url_table = read_excel(url)
local_table = self.get_exceldf('test1')
tm.assert_frame_equal(url_table, local_table)
boto3 = pytest.importorskip('boto3')
pytest.importorskip('s3fs')

with moto.mock_s3():
conn = boto3.resource("s3", region_name="us-east-1")
conn.create_bucket(Bucket="pandas-test")
file_name = os.path.join(self.dirpath, 'test1' + self.ext)
with open(file_name, 'rb') as f:
conn.Bucket("pandas-test").put_object(Key="test1" + self.ext,
Body=f)

url = ('s3://pandas-test/test1' + self.ext)
url_table = read_excel(url)
local_table = self.get_exceldf('test1')
tm.assert_frame_equal(url_table, local_table)

@pytest.mark.slow
def test_read_from_file_url(self):
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Expand Up @@ -19,6 +19,7 @@ deps =
xlrd
six
sqlalchemy
moto

# cd to anything but the default {toxinidir} which
# contains the pandas subdirectory and confuses
Expand Down

0 comments on commit c5176d6

Please sign in to comment.