Skip to content

Commit

Permalink
Make NumPy, pandas and pyarrow hard deps (#120)
Browse files Browse the repository at this point in the history
Add these three packages as firm dependencies to lower the cognitive cost of maintaining package, make it easier for users to get started immediately after install
  • Loading branch information
randyzwitch committed Nov 20, 2018
1 parent 00ea27b commit 29cd075
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 54 deletions.
2 changes: 1 addition & 1 deletion ci/install-travis.sh
Expand Up @@ -18,7 +18,7 @@ conda update -q conda

echo
echo "[conda build]"
conda install conda-build anaconda-client --yes
conda install conda-build anaconda-client conda-verify --yes

echo
echo "[add channels]"
Expand Down
2 changes: 2 additions & 0 deletions conda-recipes/pymapd/conda_build_config.yaml
@@ -0,0 +1,2 @@
numpy:
- 1.14
6 changes: 4 additions & 2 deletions conda-recipes/pymapd/meta.yaml
Expand Up @@ -20,13 +20,15 @@ requirements:
- setuptools
- setuptools_scm
- cython
- numpy 1.11.*
- numpy>= 1.14
- arrow-cpp =0.10.0
- pyarrow =0.10.0
- pandas
- conda-verify
run:
- python
- setuptools
- numpy >=1.11.*
- numpy>=1.14
- libgdf
- pygdf
- arrow-cpp =0.10.0
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Expand Up @@ -11,5 +11,5 @@ dependencies:
- arrow-cpp=0.10.0
- sqlalchemy
- cython
- numpy
- numpy>=1.14
- pandas
5 changes: 1 addition & 4 deletions pymapd/_parsers.py
Expand Up @@ -2,6 +2,7 @@
Utility methods for parsing data returned from MapD
"""
import datetime
import pyarrow as pa
from collections import namedtuple
from sqlalchemy import text
import mapd.ttypes as T
Expand Down Expand Up @@ -122,8 +123,6 @@ def _load_schema(buf):
-------
schema : pyarrow.Schema
"""
import pyarrow as pa

reader = pa.RecordBatchStreamReader(buf)
return reader.schema

Expand All @@ -142,8 +141,6 @@ def _load_data(buf, schema, tdf=None):
-------
df : pandas.DataFrame
"""
import pyarrow as pa

message = pa.read_message(buf)
rb = pa.read_record_batch(message, schema)
df = rb.to_pandas()
Expand Down
34 changes: 7 additions & 27 deletions pymapd/connection.py
Expand Up @@ -3,6 +3,8 @@
"""
from collections import namedtuple
import base64
import pandas as pd
import pyarrow as pa

import six
from sqlalchemy.engine.url import make_url
Expand All @@ -22,12 +24,6 @@
)
from ._loaders import _build_input_rows

try:
import pyarrow as pa
_HAS_ARROW = True
except ImportError:
_HAS_ARROW = False


ConnectionInfo = namedtuple("ConnectionInfo", ['user', 'password', 'host',
'port', 'dbname', 'protocol'])
Expand Down Expand Up @@ -287,18 +283,13 @@ def select_ipc(self, operation, parameters=None, first_n=-1):
Notes
-----
This method requires pandas and pyarrow to be installed
This method requires pyarrow to be installed
"""
try:
import pyarrow # noqa
except ImportError:
raise ImportError("pyarrow is required for `select_ipc`")

try:
import pandas # noqa
except ImportError:
raise ImportError("pandas is required for `select_ipc`")

from .shm import load_buffer

if parameters is not None:
Expand Down Expand Up @@ -460,10 +451,10 @@ def load_table(self, table_name, data, method='infer',
self.create_table(table_name, data)

if method == 'infer':
if (_is_pandas(data) or _is_arrow(data)) and _HAS_ARROW:
if (isinstance(data, pd.DataFrame) or _is_arrow(data)):
return self.load_table_arrow(table_name, data)

elif _is_pandas(data):
elif (isinstance(data, pd.DataFrame)):
return self.load_table_columnar(table_name, data)

elif method == 'arrow':
Expand Down Expand Up @@ -537,7 +528,7 @@ def load_table_columnar(
"""
from . import _pandas_loaders

if _is_pandas(data):
if isinstance(data, pd.DataFrame):
input_cols = _pandas_loaders.build_input_columnar(
data,
preserve_index=preserve_index,
Expand Down Expand Up @@ -616,20 +607,9 @@ def _repr_mimebundle_(self, include=None, exclude=None):
}


def _is_pandas(data):
try:
import pandas as pd
except ImportError:
return False
else:
return isinstance(data, pd.DataFrame)


def _is_arrow(data):
"""Whether `data` is an arrow `Table` or `RecordBatch`"""
if _HAS_ARROW:
return isinstance(data, pa.Table) or isinstance(data, pa.RecordBatch)
return False
return isinstance(data, pa.Table) or isinstance(data, pa.RecordBatch)


def _check_create(create):
Expand Down
7 changes: 3 additions & 4 deletions setup.py
Expand Up @@ -18,7 +18,8 @@
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()

install_requires = ['six', 'thrift == 0.11.0', 'sqlalchemy']
install_requires = ['six', 'thrift == 0.11.0', 'sqlalchemy', 'numpy', 'pandas',
'pyarrow == 0.10.0']

# Optional Requirements

Expand All @@ -27,8 +28,7 @@
test_requires = ['coverage', 'pytest == 3.3.1', 'pytest-mock']
dev_requires = doc_requires + test_requires
gpu_requires = ['pygdf', 'libgdf']
arrow_requires = ['pyarrow == 0.10.0']
complete_requires = dev_requires + gpu_requires + arrow_requires
complete_requires = dev_requires + gpu_requires

if sys.version_info.major == 2:
test_requires.append("mock")
Expand All @@ -39,7 +39,6 @@
'test': test_requires,
'dev': dev_requires,
'gpu': gpu_requires,
'arrow': arrow_requires,
'complete': complete_requires,
}

Expand Down
15 changes: 0 additions & 15 deletions tests/test_integration.py
Expand Up @@ -202,21 +202,6 @@ def test_select_dates(self, columnar, con, date_table):

class TestOptionalImports(object):

def test_select_ipc_pyarrow(self, con):
with mock.patch.dict('sys.modules', {'pyarrow': None}):
with pytest.raises(ImportError) as m:
con.select_ipc("select * from foo;")

assert m.match("pyarrow is required for `select_ipc`")

def test_select_ipc_pandas(self, con):
pytest.importorskip("pyarrow")
with mock.patch.dict('sys.modules', {'pandas': None}):
with pytest.raises(ImportError) as m:
con.select_ipc("select * from foo;")

assert m.match("pandas is required for `select_ipc`")

def test_select_gpu(self, con):
with mock.patch.dict("sys.modules",
{"pygdf": None, "pygdf.dataframe": None}):
Expand Down

0 comments on commit 29cd075

Please sign in to comment.