Skip to content

Commit

Permalink
work in progress on regression
Browse files Browse the repository at this point in the history
  • Loading branch information
pplonski committed Jun 6, 2019
1 parent d808a73 commit f82ca32
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 10 deletions.
4 changes: 4 additions & 0 deletions supervised/callbacks/early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@ def on_learner_train_start(self, logs):
def on_framework_train_end(self, logs):
# aggregate predictions from all learners
# it has two columns: 'prediction', 'target'
print("early stopping on framework train end")

print(self.best_y_predicted.values())

self.best_y_oof = pd.concat(list(self.best_y_predicted.values()))
self.best_y_oof.sort_index(inplace=True)

if "prediction" in self.best_y_oof:
self.final_loss = self.metric(
self.best_y_oof["target"], self.best_y_oof["prediction"]
Expand Down
24 changes: 20 additions & 4 deletions supervised/iterative_learner_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,25 @@ def get_train_time(self):
return self.train_time

def predictions(self, learner, train_data, validation_data):

y_train_true = train_data.get("y")
y_train_predicted = learner.predict(train_data.get("X"))
y_validation_true = validation_data.get("y")
y_validation_predicted = learner.predict(validation_data.get("X"))

if self.preprocessings[-1]._scale_y is not None:
y_train_true = self.preprocessings[-1].inverse_scale_target(y_train_true)
y_train_predicted = self.preprocessings[-1].inverse_scale_target(y_train_predicted)
y_validation_true = self.preprocessings[-1].inverse_scale_target(y_validation_true)
y_validation_predicted = self.preprocessings[-1].inverse_scale_target(y_validation_predicted)

print(y_validation_predicted)

return {
"y_train_true": train_data.get("y"),
"y_train_predicted": learner.predict(train_data.get("X")),
"y_validation_true": validation_data.get("y"),
"y_validation_predicted": learner.predict(validation_data.get("X")),
"y_train_true": y_train_true,
"y_train_predicted": y_train_predicted,
"y_validation_true": y_validation_true,
"y_validation_predicted": y_validation_predicted,
"validation_index": validation_data.get("X").index,
}

Expand All @@ -52,6 +66,7 @@ def train(self, data):
for train_data, validation_data in self.validation.split():
# the proprocessing is done at every validation step
self.preprocessings += [PreprocessingStep(self.preprocessing_params)]

train_data, _ = self.preprocessings[-1].run(train_data)
validation_data = self.preprocessings[-1].transform(validation_data)

Expand All @@ -72,6 +87,7 @@ def train(self, data):
if learner.stop_training:
break
learner.update({"step": i})
print("model training end")
# end of learner iters loop
self.callbacks.on_learner_train_end()
# end of validation loop
Expand Down
1 change: 1 addition & 0 deletions supervised/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(self, params):
raise MetricException("Unknown metric {0}".format(self.name))

def __call__(self, y_true, y_predicted):

return self.metric(y_true, y_predicted)

def improvement(self, previous, current):
Expand Down
15 changes: 11 additions & 4 deletions supervised/preprocessing/preprocessing_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,43 @@ def __init__(self, columns=[], scale_method = SCALE_NORMAL):


def fit(self, X):

if len(self.columns):
for c in self.columns:
X[c] = X[c].astype(float)

if self.scale_method == self.SCALE_NORMAL:
self.scale.fit(X[self.columns])
elif self.scale_method == self.SCALE_LOG_AND_NORMAL:
self.scale.fit(np.log(X[self.columns] - np.min(X[self.columns]) + 1))
self.X_min_values = np.min(X[self.columns])
self.scale.fit(np.log(X[self.columns] - self.X_min_values + 1))


def transform(self, X):

if len(self.columns):
X.loc[:, self.columns] = X.loc[:, self.columns].astype(float)
if self.scale_method == self.SCALE_NORMAL:
X.loc[:, self.columns] = self.scale.transform(X[self.columns])
elif self.scale_method == self.SCALE_LOG_AND_NORMAL:

self.X_min_values = np.min(X[self.columns])

X[self.columns] = np.log(X[self.columns] - self.X_min_values + 1)
X.loc[:, self.columns] = self.scale.transform(X[self.columns])
return X

def inverse_transform(self, X):

if len(self.columns):
pass

if self.scale_method == self.SCALE_NORMAL:
X.loc[:, self.columns] = self.scale.inverse_transform(X[self.columns])
elif self.scale_method == self.SCALE_LOG_AND_NORMAL:

X[self.columns] = self.scale.inverse_transform(X[self.columns])
X.loc[:, self.columns] = np.exp(X[self.columns] + self.X_min_values - 1)
X[self.columns] = np.exp(X[self.columns])

X.loc[:, self.columns] += self.X_min_values - 1
return X

def to_json(self):
Expand Down
14 changes: 14 additions & 0 deletions supervised/preprocessing/preprocessing_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,20 @@ def transform(self, validation_data=None):

return {"X": X_validation, "y": y_validation}

def inverse_scale_target(self, y):
y = pd.DataFrame({"target": y})
y = self._scale_y.inverse_transform(y)
y = y["target"]
return y

def reverse_transform_target(self, y):

# target_preprocessing = self._params.get("target_preprocessing")
# assume for now that all tasks are binary classification
# if there is no target preprocessing, assume that there is 0 and 1 target

print("reverse_transform_target !!!")

pos_label, neg_label = "1", "0"
if self._categorical_y is not None:
if len(y.shape) == 1:
Expand Down Expand Up @@ -295,6 +303,12 @@ def reverse_transform_target(self, y):
data=y, columns=["p_{}".format(i) for i in range(y.shape[1])]
)

if "ml_task" in self._params and self._params["ml_task"] == REGRESSION:
print("Apply reverse_transform_target (0)")
if self._scale_y is not None:
print("Apply reverse_transform_target")


# regression
# TODO: reverse transform for regression will be applied here
return pd.DataFrame({"prediction": y})
Expand Down
4 changes: 2 additions & 2 deletions tests/tests_preprocessing/test_preprocessing_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ class PreprocessingScaleTest(unittest.TestCase):
def test_fit_log_and_normal(self):
# training data
d = {
"col1": [1, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
"col1": [12, 13, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
"col2": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30.0],
"col3": [1, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
"col3": [12, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
}
df = pd.DataFrame(data=d)

Expand Down

0 comments on commit f82ca32

Please sign in to comment.