work in progress on regression

mljar · Jun 6, 2019 · f82ca32 · f82ca32
1 parent d808a73
commit f82ca32
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 10 deletions.
diff --git a/supervised/callbacks/early_stopping.py b/supervised/callbacks/early_stopping.py
@@ -46,9 +46,13 @@ def on_learner_train_start(self, logs):
     def on_framework_train_end(self, logs):
         # aggregate predictions from all learners
         # it has two columns: 'prediction', 'target'
+        print("early stopping on framework train end")
+
+        print(self.best_y_predicted.values())
 
         self.best_y_oof = pd.concat(list(self.best_y_predicted.values()))
         self.best_y_oof.sort_index(inplace=True)
+
         if "prediction" in self.best_y_oof:
             self.final_loss = self.metric(
                 self.best_y_oof["target"], self.best_y_oof["prediction"]

diff --git a/supervised/iterative_learner_framework.py b/supervised/iterative_learner_framework.py
@@ -34,11 +34,25 @@ def get_train_time(self):
         return self.train_time
 
     def predictions(self, learner, train_data, validation_data):
+
+        y_train_true = train_data.get("y")
+        y_train_predicted = learner.predict(train_data.get("X"))
+        y_validation_true = validation_data.get("y")
+        y_validation_predicted = learner.predict(validation_data.get("X"))
+
+        if self.preprocessings[-1]._scale_y is not None:
+            y_train_true = self.preprocessings[-1].inverse_scale_target(y_train_true)
+            y_train_predicted = self.preprocessings[-1].inverse_scale_target(y_train_predicted)
+            y_validation_true = self.preprocessings[-1].inverse_scale_target(y_validation_true)
+            y_validation_predicted = self.preprocessings[-1].inverse_scale_target(y_validation_predicted)
+
+        print(y_validation_predicted)
+
         return {
-            "y_train_true": train_data.get("y"),
-            "y_train_predicted": learner.predict(train_data.get("X")),
-            "y_validation_true": validation_data.get("y"),
-            "y_validation_predicted": learner.predict(validation_data.get("X")),
+            "y_train_true": y_train_true,
+            "y_train_predicted": y_train_predicted,
+            "y_validation_true": y_validation_true,
+            "y_validation_predicted": y_validation_predicted,
             "validation_index": validation_data.get("X").index,
         }
 
@@ -52,6 +66,7 @@ def train(self, data):
         for train_data, validation_data in self.validation.split():
             # the proprocessing is done at every validation step
             self.preprocessings += [PreprocessingStep(self.preprocessing_params)]
+
             train_data, _ = self.preprocessings[-1].run(train_data)
             validation_data = self.preprocessings[-1].transform(validation_data)
 
@@ -72,6 +87,7 @@ def train(self, data):
                 if learner.stop_training:
                     break
                 learner.update({"step": i})
+            print("model training end")
             # end of learner iters loop
             self.callbacks.on_learner_train_end()
         # end of validation loop

diff --git a/supervised/metric.py b/supervised/metric.py
@@ -55,6 +55,7 @@ def __init__(self, params):
             raise MetricException("Unknown metric {0}".format(self.name))
 
     def __call__(self, y_true, y_predicted):
+
         return self.metric(y_true, y_predicted)
 
     def improvement(self, previous, current):

diff --git a/supervised/preprocessing/preprocessing_scale.py b/supervised/preprocessing/preprocessing_scale.py
@@ -19,36 +19,43 @@ def __init__(self, columns=[], scale_method = SCALE_NORMAL):
 
 
     def fit(self, X):
+
         if len(self.columns):
             for c in self.columns:
                 X[c] = X[c].astype(float)
 
             if self.scale_method == self.SCALE_NORMAL:
                 self.scale.fit(X[self.columns])
             elif self.scale_method == self.SCALE_LOG_AND_NORMAL:
-                self.scale.fit(np.log(X[self.columns] - np.min(X[self.columns]) + 1))
+                self.X_min_values = np.min(X[self.columns])
+                self.scale.fit(np.log(X[self.columns] - self.X_min_values + 1))
+
 
     def transform(self, X):
+
         if len(self.columns):
             X.loc[:, self.columns] = X.loc[:, self.columns].astype(float)
             if self.scale_method == self.SCALE_NORMAL:
                 X.loc[:, self.columns] = self.scale.transform(X[self.columns])
             elif self.scale_method == self.SCALE_LOG_AND_NORMAL:
 
-                self.X_min_values = np.min(X[self.columns])
+
                 X[self.columns] = np.log(X[self.columns] - self.X_min_values + 1)
                 X.loc[:, self.columns] = self.scale.transform(X[self.columns])
         return X
 
     def inverse_transform(self, X):
 
         if len(self.columns):
-            pass
+
             if self.scale_method == self.SCALE_NORMAL:
                 X.loc[:, self.columns] = self.scale.inverse_transform(X[self.columns])
             elif self.scale_method == self.SCALE_LOG_AND_NORMAL:
+
                 X[self.columns] = self.scale.inverse_transform(X[self.columns])
-                X.loc[:, self.columns] = np.exp(X[self.columns] + self.X_min_values - 1)
+                X[self.columns] = np.exp(X[self.columns])
+
+                X.loc[:, self.columns] += self.X_min_values - 1
         return X
 
     def to_json(self):

diff --git a/supervised/preprocessing/preprocessing_step.py b/supervised/preprocessing/preprocessing_step.py
@@ -252,12 +252,20 @@ def transform(self, validation_data=None):
 
         return {"X": X_validation, "y": y_validation}
 
+    def inverse_scale_target(self, y):
+        y = pd.DataFrame({"target": y})
+        y = self._scale_y.inverse_transform(y)
+        y = y["target"]
+        return y
+
     def reverse_transform_target(self, y):
 
         # target_preprocessing = self._params.get("target_preprocessing")
         # assume for now that all tasks are binary classification
         # if there is no target preprocessing, assume that there is 0 and 1 target
 
+        print("reverse_transform_target !!!")
+
         pos_label, neg_label = "1", "0"
         if self._categorical_y is not None:
             if len(y.shape) == 1:
@@ -295,6 +303,12 @@ def reverse_transform_target(self, y):
                         data=y, columns=["p_{}".format(i) for i in range(y.shape[1])]
                     )
 
+        if "ml_task" in self._params and self._params["ml_task"] == REGRESSION:
+            print("Apply reverse_transform_target (0)")
+            if self._scale_y is not None:
+                print("Apply reverse_transform_target")
+
+
         # regression
         # TODO: reverse transform for regression will be applied here
         return pd.DataFrame({"prediction": y})

diff --git a/tests/tests_preprocessing/test_preprocessing_scale.py b/tests/tests_preprocessing/test_preprocessing_scale.py
@@ -13,9 +13,9 @@ class PreprocessingScaleTest(unittest.TestCase):
     def test_fit_log_and_normal(self):
         # training data
         d = {
-            "col1": [1, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
+            "col1": [12, 13, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
             "col2": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30.0],
-            "col3": [1, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
+            "col3": [12, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
         }
         df = pd.DataFrame(data=d)