Skip to content

Commit

Permalink
Merge branch 'python3.5-taylor' into python3.5
Browse files Browse the repository at this point in the history
  • Loading branch information
tgsmith61591 committed Nov 29, 2016
2 parents 2d6cdf6 + a5a5e8f commit 7acf30e
Show file tree
Hide file tree
Showing 23 changed files with 3,771 additions and 3,331 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -24,3 +24,6 @@ gh-pages/

# pycharm
.idea/

# model directory
*.mdl
16 changes: 8 additions & 8 deletions .travis.yml
Expand Up @@ -27,14 +27,14 @@ env:
WITH_MATPLOTLIB="true" WITH_SEABORN="true" COVERAGE="true"
- DISTRIB="conda" PYTHON_VERSION="2.7" SCIKIT_LEARN_VERSION="0.18"
WITH_MATPLOTLIB="false" WITH_SEABORN="false" COVERAGE="true"
- DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.17.1"
WITH_MATPLOTLIB="true" WITH_SEABORN="true" COVERAGE="true"
- DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.17.1"
WITH_MATPLOTLIB="false" WITH_SEABORN="false" COVERAGE="true"
- DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.18"
WITH_MATPLOTLIB="true" WITH_SEABORN="true" COVERAGE="true"
- DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.18"
WITH_MATPLOTLIB="false" WITH_SEABORN="false" COVERAGE="true"
# - DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.17.1"
# WITH_MATPLOTLIB="true" WITH_SEABORN="true" COVERAGE="true"
# - DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.17.1"
# WITH_MATPLOTLIB="false" WITH_SEABORN="false" COVERAGE="true"
# - DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.18"
# WITH_MATPLOTLIB="true" WITH_SEABORN="true" COVERAGE="true"
# - DISTRIB="conda" PYTHON_VERSION="3.5" SCIKIT_LEARN_VERSION="0.18"
# WITH_MATPLOTLIB="false" WITH_SEABORN="false" COVERAGE="true"

matrix:
allow_failures:
Expand Down
35 changes: 35 additions & 0 deletions skutil/base.py
Expand Up @@ -4,10 +4,12 @@
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.externals import six
from abc import ABCMeta
import re
import warnings

__all__ = [
'overrides',
'since',
'suppress_warnings',
'BaseSkutil',
'SelectiveMixin'
Expand Down Expand Up @@ -46,6 +48,8 @@ def overrides(interface_class):
Traceback (most recent call last):
AssertionError: A.b must override a super method!
.. versionadded:: 0.1.0
"""

def overrider(method):
Expand All @@ -56,6 +60,34 @@ def overrider(method):
return overrider


def since(version):
"""A decorator that annotates a function to append the version
of skutil the function was added. This decorator is an adaptation of PySpark's.
Examples
--------
>>> @since('0.1.5')
... def some_fun():
... '''Some docstring'''
... return None
...
>>>
>>> some_fun.__doc__ # doctest: +SKIP
'Some docstring\n\n.. versionadded:: 0.1.5'
.. versionadded:: 0.1.5
"""
indent_p = re.compile(r'\n( +)')
def deco(f):
indents = indent_p.findall(f.__doc__)
indent = ' ' * (min(len(m) for m in indents) if indents else 0)
f.__doc__ = f.__doc__.rstrip() + "\n\n%s.. versionadded:: %s" % (indent, version)
return f
return deco


def suppress_warnings(func):
"""Decorator that forces a method to suppress
all warnings it may raise. This should be used with caution,
Expand Down Expand Up @@ -87,6 +119,9 @@ def suppress_warnings(func):
>>>
>>> fun_that_warns()
1
.. versionadded:: 0.1.0
"""

def suppressor(*args, **kwargs):
Expand Down
17 changes: 10 additions & 7 deletions skutil/decomposition/decompose.py
Expand Up @@ -11,7 +11,7 @@
from skutil.base import *
from skutil.base import overrides
from ..utils import *
from ..utils.fixes import _cols_if_none
from ..utils.fixes import _cols_if_none, _as_numpy

__all__ = [
'SelectivePCA',
Expand Down Expand Up @@ -187,7 +187,7 @@ def fit(self, X, y=None):
# fails thru if names don't exist:
self.pca_ = PCA(
n_components=self.n_components,
whiten=self.whiten).fit(X[cols])
whiten=self.whiten).fit(X[cols].as_matrix())

return self

Expand Down Expand Up @@ -216,7 +216,7 @@ def transform(self, X):
cols = _cols_if_none(X, self.cols)

other_nms = [nm for nm in X.columns if nm not in cols]
transform = self.pca_.transform(X[cols])
transform = self.pca_.transform(X[cols].as_matrix())

# do weighting if necessary
if self.weight:
Expand Down Expand Up @@ -281,7 +281,7 @@ def score(self, X, y=None):
X, _ = validate_is_pd(X, self.cols)
cols = X.columns if not self.cols else self.cols

ll = self.pca_.score(X[cols], y)
ll = self.pca_.score(X[cols].as_matrix(), _as_numpy(y))
return ll


Expand Down Expand Up @@ -377,7 +377,7 @@ def fit(self, X, y=None):
self.svd_ = TruncatedSVD(
n_components=self.n_components,
algorithm=self.algorithm,
n_iter=self.n_iter).fit(X[cols])
n_iter=self.n_iter).fit(X[cols].as_matrix())

return self

Expand Down Expand Up @@ -406,9 +406,12 @@ def transform(self, X):
cols = _cols_if_none(X, self.cols)

other_nms = [nm for nm in X.columns if nm not in cols]
transform = self.svd_.transform(X[cols])
transform = self.svd_.transform(X[cols].as_matrix())
left = pd.DataFrame.from_records(data=transform,
columns=[('Concept%i' % (i + 1)) for i in range(transform.shape[1])])
columns=[
('Concept%i' % (i + 1))
for i in range(transform.shape[1])
])

# concat if needed
x = pd.concat([left, X[other_nms]], axis=1) if other_nms else left
Expand Down
4 changes: 2 additions & 2 deletions skutil/feature_selection/base.py
Expand Up @@ -80,14 +80,14 @@ def transform(self, X):
# check on state of X and cols
X, _ = validate_is_pd(X, self.cols)

if self.drop_ is None:
if not self.drop_: # empty or None
return X if self.as_df else X.as_matrix()
else:
# what if we don't want to throw this key error for a non-existent
# column that we hope to drop anyways? We need to at least inform the
# user...
drops = [x for x in self.drop_ if x in X.columns]
if not len(drops) == len(self.drop_):
if len(drops) != len(self.drop_):
warnings.warn('one or more features to drop not contained '
'in input data feature names', UserWarning)

Expand Down
16 changes: 3 additions & 13 deletions skutil/feature_selection/select.py
Expand Up @@ -134,7 +134,7 @@ def fit(self, X, y=None):
# assess sparsity
self.sparsity_ = X[cols].apply(lambda x: x.isnull().sum() / x.shape[0]).values # numpy array
mask = self.sparsity_ > thresh # numpy boolean array
self.drop_ = X.columns[mask].tolist() if mask.sum() > 0 else None
self.drop_ = X.columns[mask].tolist()
return self


Expand Down Expand Up @@ -289,7 +289,7 @@ def transform(self, X):
X, _ = validate_is_pd(X, self.cols) # copy X
cols = X.columns if self.cols is None else self.cols

retained = X[cols] # if cols is None, returns all
retained = X[cols] # if not cols, returns all
return retained if self.as_df else retained.as_matrix()


Expand Down Expand Up @@ -521,10 +521,7 @@ def fit(self, X, y=None):
c = X[cols].corr(method=self.method).apply(lambda x: np.abs(x))

# get drops list
d, mac, crz = filter_collinearity(c, self.threshold)
self.drop_ = d if d else None
self.mean_abs_correlations_ = mac if mac else None
self.correlations_ = crz if crz else None
self.drop_, self.mean_abs_correlations_, self.correlations_ = filter_collinearity(c, self.threshold)

return self

Expand Down Expand Up @@ -702,11 +699,4 @@ def fit(self, X, y=None):
self.drop_ = np.asarray(cols)[drop_mask].tolist()
self.var_ = dict(zip(self.drop_, matrix[drop_mask, 0].tolist())) # just retain the variances

# I don't like making this None; it opens up bugs in pd.drop,
# but it was the precedent the API set from early on, so don't
# want to change it without a warning. TODO: in future versions,
# don't do this...
if not self.drop_:
self.drop_ = None

return self
2 changes: 1 addition & 1 deletion skutil/feature_selection/tests/test_select.py
Expand Up @@ -232,7 +232,7 @@ def test_multi_collinearity():

def test_nzv_filterer():
transformer = NearZeroVarianceFilterer().fit(X)
assert transformer.drop_ is None
assert not transformer.drop_

y = X.copy()
y['zeros'] = np.zeros(150)
Expand Down
4 changes: 4 additions & 0 deletions skutil/h2o/balance.py
Expand Up @@ -144,6 +144,8 @@ class H2OOversamplingClassBalancer(_BaseH2OBalancer):
2 50
Name A, dtype: int64
.. versionadded:: 0.1.0
"""

def __init__(self, target_feature, ratio=BalancerMixin._def_ratio, shuffle=True):
Expand Down Expand Up @@ -241,6 +243,8 @@ class (1) is represented at a ratio of 0.5.
2 10
Name A, dtype: int64
.. versionadded:: 0.1.0
"""

_min_version = '3.8.2.9'
Expand Down
12 changes: 9 additions & 3 deletions skutil/h2o/base.py
Expand Up @@ -3,9 +3,12 @@
import h2o
import os
from ..utils.fixes import is_iterable
from ..base import since
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.externals import six
from h2o.frame import H2OFrame
from pkg_resources import parse_version
from ..utils import is_numeric

# in different versions, we get different exceptions
try:
Expand All @@ -18,9 +21,6 @@
except ImportError as e:
H2OConnectionError = EnvironmentError

from pkg_resources import parse_version
from ..utils import is_numeric

try:
import cPickle as pickle
except ImportError as e:
Expand Down Expand Up @@ -350,6 +350,9 @@ class BaseH2OFunctionWrapper(BaseEstimator):
max_version : str or float, optional (default=None)
The maximum version of h2o that is compatible with the transformer
.. versionadded:: 0.1.0
"""

def __init__(self, target_feature=None, min_version='any', max_version=None):
Expand Down Expand Up @@ -527,6 +530,9 @@ class BaseH2OTransformer(BaseH2OFunctionWrapper, TransformerMixin):
max_version : str or float, optional (default=None)
The maximum version of h2o that is compatible with the transformer
.. versionadded:: 0.1.0
"""

def __init__(self, feature_names=None, target_feature=None, exclude_features=None,
Expand Down

0 comments on commit 7acf30e

Please sign in to comment.