test GridSearchCV and RandomizedSearchCV with sample_weight and MockC…

…lassifier
scikit-learn · Apr 22, 2014 · c5867a7 · c5867a7
1 parent addccdf
commit c5867a7
Showing 1 changed file with 21 additions and 39 deletions.
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
@@ -15,7 +15,6 @@
 import scipy.sparse as sp
 
 from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_false, assert_true
@@ -35,7 +34,6 @@
 from sklearn.svm import LinearSVC, SVC
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import AdaBoostClassifier
 from sklearn.cluster import KMeans, SpectralClustering
 from sklearn.metrics import f1_score
 from sklearn.metrics import make_scorer
@@ -52,8 +50,13 @@ class MockClassifier(object):
     def __init__(self, foo_param=0):
         self.foo_param = foo_param
 
-    def fit(self, X, Y):
+    def fit(self, X, Y, sample_weight=None):
         assert_true(len(X) == len(Y))
+        if sample_weight is not None:
+            assert_true(len(sample_weight) == len(X),
+                        'MockClassifier sample_weight.shape[0]'
+                        ' is {0}, should be {1}'.format(len(sample_weight),
+                                                        len(X)))
         return self
 
     def predict(self, T):
@@ -63,7 +66,12 @@ def predict(self, T):
     decision_function = predict
     transform = predict
 
-    def score(self, X=None, Y=None):
+    def score(self, X=None, Y=None, sample_weight=None):
+        if X is not None and sample_weight is not None:
+            assert_true(len(sample_weight) == len(X),
+                        'MockClassifier sample_weight.shape[0]'
+                        ' is {0}, should be {1}'.format(len(sample_weight),
+                                                        len(X)))
         if self.foo_param > 1:
             score = 1.
         else:
@@ -117,6 +125,7 @@ def score(self):
 
 X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
 y = np.array([1, 1, 2, 2])
+sample_weight = np.array([1, 2, 3, 4])
 
 
 def test_parameter_grid():
@@ -673,39 +682,12 @@ def test_grid_search_allows_nans():
 
 
 def test_grid_search_with_sample_weights():
-    X, y = make_classification(random_state=0)
-    int_weights = np.random.randint(low=1, high=10, size=y.shape)
-
-    est_parameters = {
-        "base_estimator__max_depth": [1, 2, 3],
-        "n_estimators": [1, 5, 10]}
-
-    def best_est(X, y, sample_weight=None):
-        cv = KFold(y.shape[0], n_folds=2, random_state=0)
-        est = AdaBoostClassifier(DecisionTreeClassifier(), random_state=0)
-        grid_search = GridSearchCV(est, est_parameters, cv=cv)
+    """Test grid searching with sample weights"""
+    est_parameters = {"foo_param": [1, 2, 3]}
+    cv = KFold(y.shape[0], n_folds=2, random_state=0)
+    for search_cls in (GridSearchCV, RandomizedSearchCV):
+        grid_search = search_cls(MockClassifier(), est_parameters, cv=cv)
         grid_search.fit(X, y, sample_weight=sample_weight)
-        return grid_search.best_score_, grid_search.best_params_
-
-    unweighted = best_est(X, y)
-
-    assert_equal(
-        unweighted,
-        best_est(X, y, sample_weight=np.ones(shape=y.shape)),
-        msg="sample_weight=None is not equivalent to sample_weight=ones")
-
-    weighted = best_est(X, y, sample_weight=int_weights)
-    assert_not_equal(
-        unweighted, weighted,
-        msg="Unweighted and weighted best classifiers are unexpectedly equal")
-
-    for scaling in [2, 0.3]:
-        assert_almost_equal(
-            weighted[0],
-            best_est(X, y, sample_weight=int_weights * scaling)[0],
-            err_msg="sample_weight is not invariant under scaling")
-
-    assert_equal(
-        weighted,
-        best_est(X, y, sample_weight=int_weights.tolist()),
-        msg="sample_weight is not invariant to list vs array")
+        # check that sample_weight can be a list
+        grid_search = GridSearchCV(MockClassifier(), est_parameters, cv=cv)
+        grid_search.fit(X, y, sample_weight=sample_weight.tolist())