Updated h2o example, updated h2o metrics and removed some logic from …

…h2opipeline
tgsmith61591 · Sep 25, 2016 · b6a0df9 · b6a0df9
1 parent 444839f
commit b6a0df9
Show file tree

Hide file tree

Showing 4 changed files with 270 additions and 177 deletions.
diff --git a/doc/examples/h2o/h2o_example.ipynb b/doc/examples/h2o/h2o_example.ipynb
diff --git a/skutil/h2o/metrics.py b/skutil/h2o/metrics.py
@@ -71,6 +71,6 @@ def h2o_accuracy_score(y_actual, y_predict, sample_weight=None):
     score : float
     """
     _check_targets(y_actual, y_predict)
-    return _weighted_sum(y_actual==y_predict, sample_weight)
+    return _weighted_sum(y_actual==y_predict, sample_weight) / y_actual.shape[0]
 
 
diff --git a/skutil/h2o/pipeline.py b/skutil/h2o/pipeline.py
@@ -136,15 +136,15 @@ def fit(self, frame):
         x, y = validate_x_y(frame, self.feature_names, self.target_feature)
         self.target_feature = y # reset to the cleaned one, if necessary...
 
+        # ===== This shouldn't be in Pipeline's control, should be in the Estimators'
         # First, if there are any columns in the frame that are not in x, y drop them
         # we need to reappend y to make sure it doesn't get dropped out by the
         # frame_from_x_y method
-        xy = [p for p in x]
-        if y is not None:
-            xy.append(y)
-
+        #xy = [p for p in x]
+        #if y is not None:
+        #    xy.append(y)
         # retain only XY
-        frame = frame[xy]
+        #frame = frame[xy]
 
         # get the fit
         Xt, self.training_cols_ = self._pre_transform(frame)

diff --git a/skutil/h2o/select.py b/skutil/h2o/select.py
@@ -118,14 +118,18 @@ class H2OFeatureDropper(BaseH2OFeatureSelector):
         The name of the target feature (is excluded from the fit)
     """
 
-    def __init__(self, feature_names, target_feature=None):
+    def __init__(self, feature_names=None, target_feature=None):
         super(H2OFeatureDropper, self).__init__(feature_names=feature_names,
                                                 target_feature=target_feature)
 
     def fit(self, X, y=None):
+        fn = self.feature_names
+        if fn is None:
+            fn = []
+
         # We validate the features_names is a list or iterable
-        if hasattr(self.feature_names, '__iter__'):
-            self.drop_ = [i for i in self.feature_names]
+        if hasattr(fn, '__iter__'):
+            self.drop_ = [i for i in fn]
         else:
             raise ValueError('expected iterable for feature_names')