Added fix for is_iterable, as python 3.x adds '__iter__' as an attr t…

…o strings
tgsmith61591 · Nov 2, 2016 · 98b29b0 · 98b29b0
1 parent dd7188e
commit 98b29b0
Show file tree

Hide file tree

Showing 8 changed files with 54 additions and 24 deletions.
diff --git a/skutil/h2o/base.py b/skutil/h2o/base.py
@@ -1,11 +1,10 @@
 from __future__ import print_function, division, absolute_import
 import warnings
+import h2o
 import os
-
+from ..utils.fixes import is_iterable
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.externals import six
-
-import h2o
 from h2o.frame import H2OFrame
 
 # in different versions, we get different exceptions
@@ -169,7 +168,7 @@ def validate_x(x):
     """
     if x is not None:
         # validate feature_names
-        if not (hasattr(x, '__iter__') and all([isinstance(i, six.string_types) for i in x])):
+        if not (is_iterable(x) and all([isinstance(i, six.string_types) for i in x])):
             raise TypeError('x must be an iterable of strings. '
                             'Got %s' % str(x))
 

diff --git a/skutil/h2o/metrics.py b/skutil/h2o/metrics.py
@@ -1,20 +1,19 @@
+# -*- coding: utf-8 -*-
 """Metrics for scoring H2O model predictions"""
 # Author: Taylor Smith
 # adapted from sklearn for use with skutil & H2OFrames
 
 from __future__ import absolute_import, division, print_function
-
 import abc
 import warnings
-
 import numpy as np
 from h2o.frame import H2OFrame
 from sklearn.externals import six
-
 from .frame import _check_is_1d_frame, is_integer
 from .transform import H2OLabelEncoder
 from .util import h2o_bincount, h2o_col_to_numpy
 from ..utils import flatten_all
+from ..utils.fixes import is_iterable
 
 __all__ = [
     'h2o_accuracy_score',
@@ -41,7 +40,7 @@ def _get_bool(x):
     x : bool or iterable
         The boolean to extract
     """
-    if hasattr(x, '__iter__'):
+    if is_iterable(x):
         return flatten_all(x)[0]
     return x
 

diff --git a/skutil/h2o/select.py b/skutil/h2o/select.py
@@ -5,6 +5,7 @@
 from h2o.frame import H2OFrame
 from ..feature_selection import filter_collinearity
 from ..utils import is_numeric
+from ..utils.fixes import is_iterable
 from .base import (BaseH2OTransformer, check_frame, _retain_features, _frame_from_x_y)
 
 __all__ = [
@@ -171,7 +172,7 @@ def fit(self, X):
             fn = []
 
         # We validate the features_names is a list or iterable
-        if hasattr(fn, '__iter__'):
+        if is_iterable(fn):
             self.drop_ = [i for i in fn]
         else:
             raise ValueError('expected iterable for feature_names')

diff --git a/skutil/h2o/transform.py b/skutil/h2o/transform.py
@@ -3,6 +3,7 @@
 from ..utils import is_numeric, flatten_all
 from .frame import _check_is_1d_frame
 from .util import h2o_col_to_numpy, _unq_vals_col
+from ..utils.fixes import is_iterable
 from ..preprocessing import ImputerMixin
 from sklearn.externals import six
 import pandas as pd
@@ -22,7 +23,7 @@ def _flatten_one(x):
     a list is. This will determine the proper 
     type for each item in the vec.
     """
-    return x[0] if hasattr(x, '__iter__') else x
+    return x[0] if is_iterable(x) else x
 
 
 class H2OLabelEncoder(BaseH2OTransformer):
@@ -226,7 +227,7 @@ def fit(self, X):
             else:
                 self.fill_val_ = dict(zip(cols, flatten_all([X[c].mean(na_rm=True) for c in cols])))
 
-        elif hasattr(fill, '__iter__'):
+        elif is_iterable(fill):
 
             # if fill is a dictionary
             if isinstance(fill, dict):

diff --git a/skutil/metrics/_act.py b/skutil/metrics/_act.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 from h2o.frame import H2OFrame
+from ..utils.fixes import is_iterable
 import pandas as pd
 import numpy as np
 import warnings
@@ -29,7 +30,7 @@ def _single_as_numpy(x):
                     raise ValueError('must be 1d column')
                 _1d = x[x.columns[0]].as_data_frame(use_pandas=True)
                 return _1d[_1d.columns[0]].values
-            elif hasattr(x, '__iter__'):
+            elif is_iterable(x):
                 return np.asarray(x)
             else:
                 raise TypeError('cannot create numpy array out of type=%s' % type(x))

diff --git a/skutil/preprocessing/impute.py b/skutil/preprocessing/impute.py
@@ -1,5 +1,6 @@
-from __future__ import division, print_function, absolute_import
+# -*- coding: utf-8 -*-
 
+from __future__ import division, print_function, absolute_import
 import numpy as np
 import pandas as pd
 from sklearn.base import BaseEstimator, TransformerMixin, is_classifier
@@ -9,6 +10,7 @@
 from abc import ABCMeta
 from skutil.base import SelectiveMixin, BaseSkutil
 from ..utils import is_entirely_numeric, get_numeric, validate_is_pd, is_numeric
+from ..utils.fixes import is_iterable
 
 __all__ = [
     'BaggedImputer',
@@ -194,7 +196,7 @@ def fit(self, X, y=None):
                 self.modes_ = dict(zip(cols, X[cols].apply(lambda x: np.nanmean(x.values))))
 
         # if the fill is an iterable, we have to get a bit more stringent on our validation
-        elif hasattr(fill, '__iter__'):
+        elif is_iterable(fill):
 
             # if fill is a dictionary
             if isinstance(fill, dict):

diff --git a/skutil/utils/fixes.py b/skutil/utils/fixes.py
@@ -24,7 +24,8 @@
     '_CVScoreTuple',
     '_grid_detail',
     '_SK17GridSearchCV',
-    '_SK17RandomizedSearchCV'
+    '_SK17RandomizedSearchCV',
+    'is_iterable'
 ]
 
 VERSION_MAJOR = sys.version_info.major
@@ -85,6 +86,32 @@ def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
             for train, test in cv)
 
 
+def is_iterable(x):
+    """Python 3.x adds the ``__iter__`` attribute
+    to strings. Thus, our previous tests for iterable
+    will fail:
+
+        >>> if hasattr(x, '__iter__'):  ...
+
+    Parameters
+    ----------
+
+    x : object
+        The object or primitive to test whether
+        or not is an iterable.
+
+
+    Returns
+    -------
+
+    bool
+        True if ``x`` is an iterable
+    """
+    if isinstance(x, six.string_types):
+        return False
+    return hasattr(x, '__iter__')
+
+
 def _is_integer(x):
     """Determine whether some object ``x`` is an
     integer type (int, long, etc). This is part of the 
@@ -293,7 +320,7 @@ def _as_numpy(y):
         return np.copy(y)
     elif hasattr(y, 'as_matrix'):
         return y.as_matrix()
-    elif hasattr(y, '__iter__'):
+    elif is_iterable(y):
         return np.asarray([i for i in y])
     raise TypeError('cannot convert type %s to numpy ndarray' % type(y))
 

diff --git a/skutil/utils/util.py b/skutil/utils/util.py
@@ -9,7 +9,7 @@
 from sklearn.externals import six
 from sklearn.metrics import confusion_matrix as cm
 from ..base import suppress_warnings
-from .fixes import _grid_detail, _is_integer
+from .fixes import _grid_detail, _is_integer, is_iterable
 
 try:
     # this causes a UserWarning to be thrown by matplotlib... should we squelch this?
@@ -117,9 +117,9 @@ def _exp_single(x):
 
 
 def _vectorize(fun, x):
-    if hasattr(x, '__iter__'):
+    if is_iterable(x):
         return np.array([fun(p) for p in x])
-    raise ValueError('Type %s does not have attr __iter__' % type(x))
+    raise ValueError('Type %s is not iterable' % type(x))
 
 
 def exp(x):
@@ -182,7 +182,7 @@ def _val_cols(cols):
         return cols
 
     # try to make cols a list
-    if not hasattr(cols, '__iter__'):
+    if not is_iterable(cols):
         if isinstance(cols, six.string_types):
             return [cols]
         else:
@@ -351,11 +351,11 @@ def flatten_all_generator(container):
         >>> flatten_all(a)
         [3,4,'1','a',1,1,2] # yields a generator for this iterable
     """
-    if not hasattr(container, '__iter__'):
+    if not is_iterable(container):
         yield container
     else:
         for i in container:
-            if hasattr(i, '__iter__'):
+            if is_iterable(i):
                 for j in flatten_all_generator(i):
                     yield j
             else:
@@ -446,7 +446,7 @@ def _check(X, cols):
         is_df = isinstance(X, pd.DataFrame)
 
         # we do want to make sure the X at least is "array-like"
-        if not hasattr(X, '__iter__'):
+        if not is_iterable(X):
             raise TypeError('X (type=%s) cannot be cast to DataFrame' % type(X))
 
         # case 1, we have names but the X is not a frame
@@ -470,7 +470,7 @@ def _check(X, cols):
         # case 4, we have neither a frame nor cols (maybe JUST a np.array?)
         else:
             # we'll do two tests here... either that it's a np ndarray or a list of lists
-            if isinstance(X, np.ndarray) or (hasattr(X, '__iter__') and all(isinstance(elem, list) for elem in X)):
+            if isinstance(X, np.ndarray) or (is_iterable(X) and all(isinstance(elem, list) for elem in X)):
                 return pd.DataFrame.from_records(data=X, columns=_def_headers(X)), None
 
             # bail out: