Skip to content

Commit

Permalink
Added fix for is_iterable, as python 3.x adds '__iter__' as an attr t…
Browse files Browse the repository at this point in the history
…o strings
  • Loading branch information
tgsmith61591 committed Nov 2, 2016
1 parent dd7188e commit 98b29b0
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 24 deletions.
7 changes: 3 additions & 4 deletions skutil/h2o/base.py
@@ -1,11 +1,10 @@
from __future__ import print_function, division, absolute_import
import warnings
import h2o
import os

from ..utils.fixes import is_iterable
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.externals import six

import h2o
from h2o.frame import H2OFrame

# in different versions, we get different exceptions
Expand Down Expand Up @@ -169,7 +168,7 @@ def validate_x(x):
"""
if x is not None:
# validate feature_names
if not (hasattr(x, '__iter__') and all([isinstance(i, six.string_types) for i in x])):
if not (is_iterable(x) and all([isinstance(i, six.string_types) for i in x])):
raise TypeError('x must be an iterable of strings. '
'Got %s' % str(x))

Expand Down
7 changes: 3 additions & 4 deletions skutil/h2o/metrics.py
@@ -1,20 +1,19 @@
# -*- coding: utf-8 -*-
"""Metrics for scoring H2O model predictions"""
# Author: Taylor Smith
# adapted from sklearn for use with skutil & H2OFrames

from __future__ import absolute_import, division, print_function

import abc
import warnings

import numpy as np
from h2o.frame import H2OFrame
from sklearn.externals import six

from .frame import _check_is_1d_frame, is_integer
from .transform import H2OLabelEncoder
from .util import h2o_bincount, h2o_col_to_numpy
from ..utils import flatten_all
from ..utils.fixes import is_iterable

__all__ = [
'h2o_accuracy_score',
Expand All @@ -41,7 +40,7 @@ def _get_bool(x):
x : bool or iterable
The boolean to extract
"""
if hasattr(x, '__iter__'):
if is_iterable(x):
return flatten_all(x)[0]
return x

Expand Down
3 changes: 2 additions & 1 deletion skutil/h2o/select.py
Expand Up @@ -5,6 +5,7 @@
from h2o.frame import H2OFrame
from ..feature_selection import filter_collinearity
from ..utils import is_numeric
from ..utils.fixes import is_iterable
from .base import (BaseH2OTransformer, check_frame, _retain_features, _frame_from_x_y)

__all__ = [
Expand Down Expand Up @@ -171,7 +172,7 @@ def fit(self, X):
fn = []

# We validate the features_names is a list or iterable
if hasattr(fn, '__iter__'):
if is_iterable(fn):
self.drop_ = [i for i in fn]
else:
raise ValueError('expected iterable for feature_names')
Expand Down
5 changes: 3 additions & 2 deletions skutil/h2o/transform.py
Expand Up @@ -3,6 +3,7 @@
from ..utils import is_numeric, flatten_all
from .frame import _check_is_1d_frame
from .util import h2o_col_to_numpy, _unq_vals_col
from ..utils.fixes import is_iterable
from ..preprocessing import ImputerMixin
from sklearn.externals import six
import pandas as pd
Expand All @@ -22,7 +23,7 @@ def _flatten_one(x):
a list is. This will determine the proper
type for each item in the vec.
"""
return x[0] if hasattr(x, '__iter__') else x
return x[0] if is_iterable(x) else x


class H2OLabelEncoder(BaseH2OTransformer):
Expand Down Expand Up @@ -226,7 +227,7 @@ def fit(self, X):
else:
self.fill_val_ = dict(zip(cols, flatten_all([X[c].mean(na_rm=True) for c in cols])))

elif hasattr(fill, '__iter__'):
elif is_iterable(fill):

# if fill is a dictionary
if isinstance(fill, dict):
Expand Down
3 changes: 2 additions & 1 deletion skutil/metrics/_act.py
@@ -1,5 +1,6 @@
from __future__ import division, absolute_import, print_function
from h2o.frame import H2OFrame
from ..utils.fixes import is_iterable
import pandas as pd
import numpy as np
import warnings
Expand Down Expand Up @@ -29,7 +30,7 @@ def _single_as_numpy(x):
raise ValueError('must be 1d column')
_1d = x[x.columns[0]].as_data_frame(use_pandas=True)
return _1d[_1d.columns[0]].values
elif hasattr(x, '__iter__'):
elif is_iterable(x):
return np.asarray(x)
else:
raise TypeError('cannot create numpy array out of type=%s' % type(x))
Expand Down
6 changes: 4 additions & 2 deletions skutil/preprocessing/impute.py
@@ -1,5 +1,6 @@
from __future__ import division, print_function, absolute_import
# -*- coding: utf-8 -*-

from __future__ import division, print_function, absolute_import
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin, is_classifier
Expand All @@ -9,6 +10,7 @@
from abc import ABCMeta
from skutil.base import SelectiveMixin, BaseSkutil
from ..utils import is_entirely_numeric, get_numeric, validate_is_pd, is_numeric
from ..utils.fixes import is_iterable

__all__ = [
'BaggedImputer',
Expand Down Expand Up @@ -194,7 +196,7 @@ def fit(self, X, y=None):
self.modes_ = dict(zip(cols, X[cols].apply(lambda x: np.nanmean(x.values))))

# if the fill is an iterable, we have to get a bit more stringent on our validation
elif hasattr(fill, '__iter__'):
elif is_iterable(fill):

# if fill is a dictionary
if isinstance(fill, dict):
Expand Down
31 changes: 29 additions & 2 deletions skutil/utils/fixes.py
Expand Up @@ -24,7 +24,8 @@
'_CVScoreTuple',
'_grid_detail',
'_SK17GridSearchCV',
'_SK17RandomizedSearchCV'
'_SK17RandomizedSearchCV',
'is_iterable'
]

VERSION_MAJOR = sys.version_info.major
Expand Down Expand Up @@ -85,6 +86,32 @@ def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
for train, test in cv)


def is_iterable(x):
"""Python 3.x adds the ``__iter__`` attribute
to strings. Thus, our previous tests for iterable
will fail:
>>> if hasattr(x, '__iter__'): ...
Parameters
----------
x : object
The object or primitive to test whether
or not is an iterable.
Returns
-------
bool
True if ``x`` is an iterable
"""
if isinstance(x, six.string_types):
return False
return hasattr(x, '__iter__')


def _is_integer(x):
"""Determine whether some object ``x`` is an
integer type (int, long, etc). This is part of the
Expand Down Expand Up @@ -293,7 +320,7 @@ def _as_numpy(y):
return np.copy(y)
elif hasattr(y, 'as_matrix'):
return y.as_matrix()
elif hasattr(y, '__iter__'):
elif is_iterable(y):
return np.asarray([i for i in y])
raise TypeError('cannot convert type %s to numpy ndarray' % type(y))

Expand Down
16 changes: 8 additions & 8 deletions skutil/utils/util.py
Expand Up @@ -9,7 +9,7 @@
from sklearn.externals import six
from sklearn.metrics import confusion_matrix as cm
from ..base import suppress_warnings
from .fixes import _grid_detail, _is_integer
from .fixes import _grid_detail, _is_integer, is_iterable

try:
# this causes a UserWarning to be thrown by matplotlib... should we squelch this?
Expand Down Expand Up @@ -117,9 +117,9 @@ def _exp_single(x):


def _vectorize(fun, x):
if hasattr(x, '__iter__'):
if is_iterable(x):
return np.array([fun(p) for p in x])
raise ValueError('Type %s does not have attr __iter__' % type(x))
raise ValueError('Type %s is not iterable' % type(x))


def exp(x):
Expand Down Expand Up @@ -182,7 +182,7 @@ def _val_cols(cols):
return cols

# try to make cols a list
if not hasattr(cols, '__iter__'):
if not is_iterable(cols):
if isinstance(cols, six.string_types):
return [cols]
else:
Expand Down Expand Up @@ -351,11 +351,11 @@ def flatten_all_generator(container):
>>> flatten_all(a)
[3,4,'1','a',1,1,2] # yields a generator for this iterable
"""
if not hasattr(container, '__iter__'):
if not is_iterable(container):
yield container
else:
for i in container:
if hasattr(i, '__iter__'):
if is_iterable(i):
for j in flatten_all_generator(i):
yield j
else:
Expand Down Expand Up @@ -446,7 +446,7 @@ def _check(X, cols):
is_df = isinstance(X, pd.DataFrame)

# we do want to make sure the X at least is "array-like"
if not hasattr(X, '__iter__'):
if not is_iterable(X):
raise TypeError('X (type=%s) cannot be cast to DataFrame' % type(X))

# case 1, we have names but the X is not a frame
Expand All @@ -470,7 +470,7 @@ def _check(X, cols):
# case 4, we have neither a frame nor cols (maybe JUST a np.array?)
else:
# we'll do two tests here... either that it's a np ndarray or a list of lists
if isinstance(X, np.ndarray) or (hasattr(X, '__iter__') and all(isinstance(elem, list) for elem in X)):
if isinstance(X, np.ndarray) or (is_iterable(X) and all(isinstance(elem, list) for elem in X)):
return pd.DataFrame.from_records(data=X, columns=_def_headers(X)), None

# bail out:
Expand Down

0 comments on commit 98b29b0

Please sign in to comment.