Skip to content

Commit

Permalink
Merge branch 'working'
Browse files Browse the repository at this point in the history
  • Loading branch information
tgsmith61591 committed Sep 22, 2016
2 parents aa139e5 + d8bbdf7 commit c3f9130
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 24 deletions.
15 changes: 1 addition & 14 deletions skutil/preprocessing/tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,20 +224,7 @@ def test_strange_input():
SelectivePCA(cols=[]).fit_transform(x)

# test bad input
failed = False
try:
print(validate_is_pd("bad", None))
except ValueError as v:
failed = True
assert failed

# test bad cols input
failed = False
try:
print(validate_is_pd(x, 'blah'))
except ValueError as v:
failed = True
assert failed
assert_fails(validate_is_pd, TypeError, "bad", None)


def test_selective_scale():
Expand Down
12 changes: 5 additions & 7 deletions skutil/preprocessing/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.externals.joblib import Parallel, delayed
from sklearn.externals import six
from scipy.stats import boxcox
from scipy import optimize
from .encode import _get_unseen
Expand Down Expand Up @@ -66,7 +67,7 @@ def __init__(self, cols=None, fun=None, **kwargs):
self.fun = fun
self.kwargs = kwargs

def fit(self, X, y = None):
def fit(self, X, y=None):
"""Validate the args
Parameters
Expand All @@ -76,10 +77,6 @@ def fit(self, X, y = None):
y : Passthrough for Pipeline compatibility
"""
# this function is a bit strange, because we can accept a single col:
if isinstance(self.cols, str):
self.cols = [self.cols]

# Check this second in this case
X, self.cols = validate_is_pd(X, self.cols)

Expand All @@ -106,10 +103,11 @@ def transform(self, X, y = None):
y : Passthrough for Pipeline compatibility
"""
X, _ = validate_is_pd(X, self.cols)
X, cols = validate_is_pd(X, self.cols)
cols = cols if not cols is None else X.columns

# apply the function
X[self.cols or X.columns] = X[self.cols or X.columns].apply(lambda x: self.fun(x, **self.kwargs))
X[cols] = X[cols].apply(lambda x: self.fun(x, **self.kwargs))
return X


Expand Down
3 changes: 2 additions & 1 deletion skutil/utils/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ def test_validate_on_non_df():
x = iris.data
validate_is_pd(x, None)

assert_fails(validate_is_pd, ValueError, 'asdf', 'asdf')
# it will try to create a DF out of a String
assert_fails(validate_is_pd, TypeError, 'asdf', 'asdf')

# try on list of list and no cols
x = [[1,2,3],[4,5,6],[7,8,9]]
Expand Down
11 changes: 9 additions & 2 deletions skutil/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ def _val_cols(cols):

# try to make cols a list
if not hasattr(cols, '__iter__'):
raise ValueError('cols must be an iterable sequence')
if isinstance(cols, six.string_types):
return [cols]
else:
raise ValueError('cols must be an iterable sequence')
return [c for c in cols] # make it a list implicitly, make no guarantees about elements

def _def_headers(X):
Expand Down Expand Up @@ -311,9 +314,13 @@ def _check(X, cols):
if cols is not None and len(cols) == 0:
cols = None

# avoid multiple isinstances
# avoid multiple isinstance checks
is_df = isinstance(X, pd.DataFrame)

# we do want to make sure the X at least is "array-like"
if not hasattr(X, '__iter__'):
raise TypeError('X (type=%s) cannot be cast to DataFrame' % type(X))

# case 1, we have names but the X is not a frame
if not is_df and cols is not None:
# this is tough, because they only pass cols if it's a subset
Expand Down

0 comments on commit c3f9130

Please sign in to comment.