Skip to content

Commit

Permalink
test GridSearchCV and RandomizedSearchCV with sample_weight and MockC…
Browse files Browse the repository at this point in the history
…lassifier
  • Loading branch information
ndawe committed Apr 22, 2014
1 parent addccdf commit c5867a7
Showing 1 changed file with 21 additions and 39 deletions.
60 changes: 21 additions & 39 deletions sklearn/tests/test_grid_search.py
Expand Up @@ -15,7 +15,6 @@
import scipy.sparse as sp

from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_not_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.testing import assert_false, assert_true
Expand All @@ -35,7 +34,6 @@
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
Expand All @@ -52,8 +50,13 @@ class MockClassifier(object):
def __init__(self, foo_param=0):
self.foo_param = foo_param

def fit(self, X, Y):
def fit(self, X, Y, sample_weight=None):
assert_true(len(X) == len(Y))
if sample_weight is not None:
assert_true(len(sample_weight) == len(X),
'MockClassifier sample_weight.shape[0]'
' is {0}, should be {1}'.format(len(sample_weight),
len(X)))
return self

def predict(self, T):
Expand All @@ -63,7 +66,12 @@ def predict(self, T):
decision_function = predict
transform = predict

def score(self, X=None, Y=None):
def score(self, X=None, Y=None, sample_weight=None):
if X is not None and sample_weight is not None:
assert_true(len(sample_weight) == len(X),
'MockClassifier sample_weight.shape[0]'
' is {0}, should be {1}'.format(len(sample_weight),
len(X)))
if self.foo_param > 1:
score = 1.
else:
Expand Down Expand Up @@ -117,6 +125,7 @@ def score(self):

X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
sample_weight = np.array([1, 2, 3, 4])


def test_parameter_grid():
Expand Down Expand Up @@ -673,39 +682,12 @@ def test_grid_search_allows_nans():


def test_grid_search_with_sample_weights():
X, y = make_classification(random_state=0)
int_weights = np.random.randint(low=1, high=10, size=y.shape)

est_parameters = {
"base_estimator__max_depth": [1, 2, 3],
"n_estimators": [1, 5, 10]}

def best_est(X, y, sample_weight=None):
cv = KFold(y.shape[0], n_folds=2, random_state=0)
est = AdaBoostClassifier(DecisionTreeClassifier(), random_state=0)
grid_search = GridSearchCV(est, est_parameters, cv=cv)
"""Test grid searching with sample weights"""
est_parameters = {"foo_param": [1, 2, 3]}
cv = KFold(y.shape[0], n_folds=2, random_state=0)
for search_cls in (GridSearchCV, RandomizedSearchCV):
grid_search = search_cls(MockClassifier(), est_parameters, cv=cv)
grid_search.fit(X, y, sample_weight=sample_weight)
return grid_search.best_score_, grid_search.best_params_

unweighted = best_est(X, y)

assert_equal(
unweighted,
best_est(X, y, sample_weight=np.ones(shape=y.shape)),
msg="sample_weight=None is not equivalent to sample_weight=ones")

weighted = best_est(X, y, sample_weight=int_weights)
assert_not_equal(
unweighted, weighted,
msg="Unweighted and weighted best classifiers are unexpectedly equal")

for scaling in [2, 0.3]:
assert_almost_equal(
weighted[0],
best_est(X, y, sample_weight=int_weights * scaling)[0],
err_msg="sample_weight is not invariant under scaling")

assert_equal(
weighted,
best_est(X, y, sample_weight=int_weights.tolist()),
msg="sample_weight is not invariant to list vs array")
# check that sample_weight can be a list
grid_search = GridSearchCV(MockClassifier(), est_parameters, cv=cv)
grid_search.fit(X, y, sample_weight=sample_weight.tolist())

0 comments on commit c5867a7

Please sign in to comment.