Merge branch 'working'

tgsmith61591 · Sep 22, 2016 · c3f9130 · c3f9130
2 parents aa139e5 + d8bbdf7
commit c3f9130
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 24 deletions.
diff --git a/skutil/preprocessing/tests/test_transform.py b/skutil/preprocessing/tests/test_transform.py
@@ -224,20 +224,7 @@ def test_strange_input():
 	SelectivePCA(cols=[]).fit_transform(x)
 
 	# test bad input
-	failed = False
-	try:
-		print(validate_is_pd("bad", None))
-	except ValueError as v:
-		failed = True
-	assert failed
-
-	# test bad cols input
-	failed = False
-	try:
-		print(validate_is_pd(x, 'blah'))
-	except ValueError as v:
-		failed = True
-	assert failed
+	assert_fails(validate_is_pd, TypeError, "bad", None)
 
 
 def test_selective_scale():

diff --git a/skutil/preprocessing/transform.py b/skutil/preprocessing/transform.py
@@ -8,6 +8,7 @@
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.externals.joblib import Parallel, delayed
+from sklearn.externals import six
 from scipy.stats import boxcox
 from scipy import optimize
 from .encode import _get_unseen
@@ -66,7 +67,7 @@ def __init__(self, cols=None, fun=None, **kwargs):
         self.fun = fun
         self.kwargs = kwargs
 
-    def fit(self, X, y = None):
+    def fit(self, X, y=None):
         """Validate the args
         
         Parameters
@@ -76,10 +77,6 @@ def fit(self, X, y = None):
         
         y : Passthrough for Pipeline compatibility
         """
-        # this function is a bit strange, because we can accept a single col:
-        if isinstance(self.cols, str):
-            self.cols = [self.cols]
-
         # Check this second in this case
         X, self.cols = validate_is_pd(X, self.cols)
 
@@ -106,10 +103,11 @@ def transform(self, X, y = None):
         
         y : Passthrough for Pipeline compatibility
         """
-        X, _ = validate_is_pd(X, self.cols)
+        X, cols = validate_is_pd(X, self.cols)
+        cols = cols if not cols is None else X.columns
 
         # apply the function
-        X[self.cols or X.columns] = X[self.cols or X.columns].apply(lambda x: self.fun(x, **self.kwargs))
+        X[cols] = X[cols].apply(lambda x: self.fun(x, **self.kwargs))
         return X
 
 

diff --git a/skutil/utils/tests/test_util.py b/skutil/utils/tests/test_util.py
@@ -135,7 +135,8 @@ def test_validate_on_non_df():
 	x = iris.data
 	validate_is_pd(x, None)
 
-	assert_fails(validate_is_pd, ValueError, 'asdf', 'asdf')
+	# it will try to create a DF out of a String
+	assert_fails(validate_is_pd, TypeError, 'asdf', 'asdf')
 
 	# try on list of list and no cols
 	x = [[1,2,3],[4,5,6],[7,8,9]]

diff --git a/skutil/utils/util.py b/skutil/utils/util.py
@@ -130,7 +130,10 @@ def _val_cols(cols):
 
     # try to make cols a list
     if not hasattr(cols, '__iter__'):
-        raise ValueError('cols must be an iterable sequence')
+        if isinstance(cols, six.string_types):
+            return [cols]
+        else:
+            raise ValueError('cols must be an iterable sequence')
     return [c for c in cols] # make it a list implicitly, make no guarantees about elements
 
 def _def_headers(X):
@@ -311,9 +314,13 @@ def _check(X, cols):
         if cols is not None and len(cols) == 0:
             cols = None
 
-        # avoid multiple isinstances
+        # avoid multiple isinstance checks
         is_df = isinstance(X, pd.DataFrame)
 
+        # we do want to make sure the X at least is "array-like"
+        if not hasattr(X, '__iter__'):
+            raise TypeError('X (type=%s) cannot be cast to DataFrame' % type(X))
+
         # case 1, we have names but the X is not a frame
         if not is_df and cols is not None:
             # this is tough, because they only pass cols if it's a subset