Make NumPy, pandas and pyarrow hard deps (#120)

Add these three packages as firm dependencies to lower the cognitive cost of maintaining package, make it easier for users to get started immediately after install
heavyai · Nov 20, 2018 · 29cd075 · 29cd075
1 parent 00ea27b
commit 29cd075
Show file tree

Hide file tree

Showing 8 changed files with 19 additions and 54 deletions.
diff --git a/ci/install-travis.sh b/ci/install-travis.sh
@@ -18,7 +18,7 @@ conda update -q conda
 
 echo
 echo "[conda build]"
-conda install conda-build anaconda-client --yes
+conda install conda-build anaconda-client conda-verify --yes
 
 echo
 echo "[add channels]"

diff --git a/conda-recipes/pymapd/conda_build_config.yaml b/conda-recipes/pymapd/conda_build_config.yaml
@@ -0,0 +1,2 @@
+numpy:
+  - 1.14
diff --git a/conda-recipes/pymapd/meta.yaml b/conda-recipes/pymapd/meta.yaml
@@ -20,13 +20,15 @@ requirements:
     - setuptools
     - setuptools_scm
     - cython
-    - numpy 1.11.*
+    - numpy>= 1.14
     - arrow-cpp =0.10.0
     - pyarrow =0.10.0
+    - pandas
+    - conda-verify
   run:
     - python
     - setuptools
-    - numpy >=1.11.*
+    - numpy>=1.14
     - libgdf
     - pygdf
     - arrow-cpp =0.10.0

diff --git a/environment.yml b/environment.yml
@@ -11,5 +11,5 @@ dependencies:
 - arrow-cpp=0.10.0
 - sqlalchemy
 - cython
-- numpy
+- numpy>=1.14
 - pandas
diff --git a/pymapd/_parsers.py b/pymapd/_parsers.py
@@ -2,6 +2,7 @@
 Utility methods for parsing data returned from MapD
 """
 import datetime
+import pyarrow as pa
 from collections import namedtuple
 from sqlalchemy import text
 import mapd.ttypes as T
@@ -122,8 +123,6 @@ def _load_schema(buf):
     -------
     schema : pyarrow.Schema
     """
-    import pyarrow as pa
-
     reader = pa.RecordBatchStreamReader(buf)
     return reader.schema
 
@@ -142,8 +141,6 @@ def _load_data(buf, schema, tdf=None):
     -------
     df : pandas.DataFrame
     """
-    import pyarrow as pa
-
     message = pa.read_message(buf)
     rb = pa.read_record_batch(message, schema)
     df = rb.to_pandas()

diff --git a/pymapd/connection.py b/pymapd/connection.py
@@ -3,6 +3,8 @@
 """
 from collections import namedtuple
 import base64
+import pandas as pd
+import pyarrow as pa
 
 import six
 from sqlalchemy.engine.url import make_url
@@ -22,12 +24,6 @@
 )
 from ._loaders import _build_input_rows
 
-try:
-    import pyarrow as pa
-    _HAS_ARROW = True
-except ImportError:
-    _HAS_ARROW = False
-
 
 ConnectionInfo = namedtuple("ConnectionInfo", ['user', 'password', 'host',
                                                'port', 'dbname', 'protocol'])
@@ -287,18 +283,13 @@ def select_ipc(self, operation, parameters=None, first_n=-1):
 
         Notes
         -----
-        This method requires pandas and pyarrow to be installed
+        This method requires pyarrow to be installed
         """
         try:
             import pyarrow  # noqa
         except ImportError:
             raise ImportError("pyarrow is required for `select_ipc`")
 
-        try:
-            import pandas  # noqa
-        except ImportError:
-            raise ImportError("pandas is required for `select_ipc`")
-
         from .shm import load_buffer
 
         if parameters is not None:
@@ -460,10 +451,10 @@ def load_table(self, table_name, data, method='infer',
             self.create_table(table_name, data)
 
         if method == 'infer':
-            if (_is_pandas(data) or _is_arrow(data)) and _HAS_ARROW:
+            if (isinstance(data, pd.DataFrame) or _is_arrow(data)):
                 return self.load_table_arrow(table_name, data)
 
-            elif _is_pandas(data):
+            elif (isinstance(data, pd.DataFrame)):
                 return self.load_table_columnar(table_name, data)
 
         elif method == 'arrow':
@@ -537,7 +528,7 @@ def load_table_columnar(
         """
         from . import _pandas_loaders
 
-        if _is_pandas(data):
+        if isinstance(data, pd.DataFrame):
             input_cols = _pandas_loaders.build_input_columnar(
                 data,
                 preserve_index=preserve_index,
@@ -616,20 +607,9 @@ def _repr_mimebundle_(self, include=None, exclude=None):
             }
 
 
-def _is_pandas(data):
-    try:
-        import pandas as pd
-    except ImportError:
-        return False
-    else:
-        return isinstance(data, pd.DataFrame)
-
-
 def _is_arrow(data):
     """Whether `data` is an arrow `Table` or `RecordBatch`"""
-    if _HAS_ARROW:
-        return isinstance(data, pa.Table) or isinstance(data, pa.RecordBatch)
-    return False
+    return isinstance(data, pa.Table) or isinstance(data, pa.RecordBatch)
 
 
 def _check_create(create):

diff --git a/setup.py b/setup.py
@@ -18,7 +18,8 @@
 with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
     long_description = f.read()
 
-install_requires = ['six', 'thrift == 0.11.0', 'sqlalchemy']
+install_requires = ['six', 'thrift == 0.11.0', 'sqlalchemy', 'numpy', 'pandas',
+                    'pyarrow == 0.10.0']
 
 # Optional Requirements
 
@@ -27,8 +28,7 @@
 test_requires = ['coverage', 'pytest == 3.3.1', 'pytest-mock']
 dev_requires = doc_requires + test_requires
 gpu_requires = ['pygdf', 'libgdf']
-arrow_requires = ['pyarrow == 0.10.0']
-complete_requires = dev_requires + gpu_requires + arrow_requires
+complete_requires = dev_requires + gpu_requires
 
 if sys.version_info.major == 2:
     test_requires.append("mock")
@@ -39,7 +39,6 @@
     'test': test_requires,
     'dev': dev_requires,
     'gpu': gpu_requires,
-    'arrow': arrow_requires,
     'complete': complete_requires,
 }
 

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -202,21 +202,6 @@ def test_select_dates(self, columnar, con, date_table):
 
 class TestOptionalImports(object):
 
-    def test_select_ipc_pyarrow(self, con):
-        with mock.patch.dict('sys.modules', {'pyarrow': None}):
-            with pytest.raises(ImportError) as m:
-                con.select_ipc("select * from foo;")
-
-        assert m.match("pyarrow is required for `select_ipc`")
-
-    def test_select_ipc_pandas(self, con):
-        pytest.importorskip("pyarrow")
-        with mock.patch.dict('sys.modules', {'pandas': None}):
-            with pytest.raises(ImportError) as m:
-                con.select_ipc("select * from foo;")
-
-        assert m.match("pandas is required for `select_ipc`")
-
     def test_select_gpu(self, con):
         with mock.patch.dict("sys.modules",
                              {"pygdf": None, "pygdf.dataframe": None}):