From 38910b6876ef43669573c6af3eb6004e66c82ad7 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sat, 13 Jun 2020 14:23:00 -0400 Subject: [PATCH 01/34] feat: enabled early stopping and add early stopping unit test --- ludwig/models/model.py | 61 ++++++------- .../test_model_training_options.py | 91 +++++++++++++++++++ 2 files changed, 121 insertions(+), 31 deletions(-) create mode 100644 tests/integration_tests/test_model_training_options.py diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 0badf2131a9..299562b301f 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -651,36 +651,35 @@ def train( ) ) - # if should_validate: - # should_break = self.check_progress_on_validation( - # progress_tracker, - # validation_field, - # validation_metric, - # session, - # model_weights_path, - # model_hyperparameters_path, - # reduce_learning_rate_on_plateau, - # reduce_learning_rate_on_plateau_patience, - # reduce_learning_rate_on_plateau_rate, - # increase_batch_size_on_plateau_patience, - # increase_batch_size_on_plateau, - # increase_batch_size_on_plateau_max, - # increase_batch_size_on_plateau_rate, - # early_stop, - # skip_save_model - # ) - # if should_break: - # break - # else: - # # there's no validation, so we save the model at each iteration - # if is_on_master(): - # if not skip_save_model: - # self.save_weights(session, model_weights_path) - # self.save_hyperparameters( - # self.hyperparameters, - # model_hyperparameters_path - # ) - # + if should_validate: + should_break = self.check_progress_on_validation( + progress_tracker, + validation_field, + validation_metric, + model_weights_path, + model_hyperparameters_path, + reduce_learning_rate_on_plateau, + reduce_learning_rate_on_plateau_patience, + reduce_learning_rate_on_plateau_rate, + increase_batch_size_on_plateau_patience, + increase_batch_size_on_plateau, + increase_batch_size_on_plateau_max, + increase_batch_size_on_plateau_rate, + early_stop, + skip_save_model + ) + if should_break: + break + # else: + # # there's no validation, so we save the model at each iteration + # if is_on_master(): + # if not skip_save_model: + # self.save_weights(session, model_weights_path) + # self.save_hyperparameters( + # self.hyperparameters, + # model_hyperparameters_path + # ) + # # # ========== Save training progress ========== # if is_on_master(): # if not skip_save_progress: @@ -932,7 +931,7 @@ def check_progress_on_validation( progress_tracker, validation_field, validation_metric, - session, model_weights_path, + model_weights_path, model_hyperparameters_path, reduce_learning_rate_on_plateau, reduce_learning_rate_on_plateau_patience, diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py new file mode 100644 index 00000000000..583065800f0 --- /dev/null +++ b/tests/integration_tests/test_model_training_options.py @@ -0,0 +1,91 @@ +import os.path +import json + +import pandas as pd +import numpy as np + +import pytest + +from ludwig.experiment import full_experiment + +@pytest.fixture +def train_df(): + # function generates simple training data tha guarantee convergence + # within 30 epochs for suitable model definition + NUMBER_OBSERVATIONS = 200 + + # generate training data + np.random.seed(42) + x = np.array(range(NUMBER_OBSERVATIONS)).reshape(-1, 1) + y = 2*x + 1 + np.random.normal(size=x.shape[0]).reshape(-1, 1) + + train_df = pd.DataFrame(np.concatenate((x, y), axis=1), columns=['x', 'y']) + + return train_df + +@pytest.mark.parametrize('early_stop', [3, 5]) +def test_early_stopping(early_stop, train_df, tmp_path): + # model definition guarantee convergence in under 30 epochs + input_features = [ + {'name': 'x', 'type': 'numerical'}, + ] + output_features = [ + {'name': 'y', 'type': 'numerical', 'loss': {'type': 'mean_squared_error'}, + 'num_fc_layers': 5, 'fc_size': 64} + ] + model_definition = { + 'input_features': input_features, + 'output_features': output_features, + 'combiner': { + 'type': 'concat' + }, + 'training': { + 'epochs': 100, + 'early_stop': early_stop, + 'batch_size': 16 + } + } + + # create sub-directory to store results + results_dir = tmp_path / 'results' + results_dir.mkdir() + + # specify model training options + kwargs = { + 'output_directory':results_dir, + 'model_definition': model_definition, + 'skip_save_processed_input': True, + 'skip_save_progress': True, + 'skip_save_unprocessed_output': True, + 'skip_save_model': True, + 'skip_save_log': True + + } + + # run experiment + exp_dir_name = full_experiment(data_df=train_df, **kwargs) + + # test existence of required files + train_stats_fp = os.path.join(exp_dir_name, 'training_statistics.json') + metadata_fp = os.path.join(exp_dir_name, 'description.json') + assert os.path.isfile(train_stats_fp) + assert os.path.isfile(metadata_fp) + + # retrieve results so we can validate early stopping + with open(train_stats_fp,'r') as f: + train_stats = json.load(f) + with open(metadata_fp, 'r') as f: + metadata = json.load(f) + + # get early stopping value + early_stop_value = metadata['model_definition']['training']['early_stop'] + + # retrieve validation losses + vald_losses = np.array(train_stats['validation']['combined']['loss']) + last_epoch = vald_losses.shape[0] + best_epoch = np.argmin(vald_losses) + + # confirm early stopping + assert (last_epoch - best_epoch - 1) == early_stop_value + + From 6b100587fdca8963bb8f9baa1ed9c5a25d15d0cb Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sun, 14 Jun 2020 22:59:42 -0400 Subject: [PATCH 02/34] feat: enable model save and restore functions with unit test --- ludwig/models/model.py | 65 ++++++------ ludwig/predict.py | 2 +- .../test_model_training_options.py | 100 ++++++++++++++---- 3 files changed, 116 insertions(+), 51 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 299562b301f..c26b833b416 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -670,32 +670,32 @@ def train( ) if should_break: break - # else: - # # there's no validation, so we save the model at each iteration - # if is_on_master(): - # if not skip_save_model: - # self.save_weights(session, model_weights_path) - # self.save_hyperparameters( - # self.hyperparameters, - # model_hyperparameters_path - # ) - # - # # ========== Save training progress ========== - # if is_on_master(): - # if not skip_save_progress: - # self.save_weights(session, model_weights_progress_path) - # progress_tracker.save( - # os.path.join( - # save_path, - # TRAINING_PROGRESS_FILE_NAME - # ) - # ) - # if skip_save_model: - # self.save_hyperparameters( - # self.hyperparameters, - # model_hyperparameters_path - # ) - # + else: + # there's no validation, so we save the model at each iteration + if is_on_master(): + if not skip_save_model: + self.save_weights(model_weights_path) + self.save_hyperparameters( + self.hyperparameters, + model_hyperparameters_path + ) + + # ========== Save training progress ========== + if is_on_master(): + if not skip_save_progress: + self.save_weights(model_weights_progress_path) + progress_tracker.save( + os.path.join( + save_path, + TRAINING_PROGRESS_FILE_NAME + ) + ) + if skip_save_model: + self.save_hyperparameters( + self.hyperparameters, + model_hyperparameters_path + ) + # if is_on_master(): # contrib_command("train_epoch_end", progress_tracker) # logger.info('') @@ -956,7 +956,7 @@ def check_progress_on_validation( validation_field][validation_metric][-1] if is_on_master(): if not skip_save_model: - self.save_weights(session, model_weights_path) + self.save_weights(model_weights_path) self.save_hyperparameters( self.hyperparameters, model_hyperparameters_path @@ -1110,10 +1110,10 @@ def collect_weights( # return collected_tensors pass - def save_weights(self, session, save_path): + def save_weights(self, save_path): # todo tf2: reintroduce functionality - # self.weights_save_path = self.saver.save(session, save_path) - pass + #self.weights_save_path = self.saver.save(save_path) + self.ecd.save_weights(save_path) def save_hyperparameters(self, hyperparameters, save_path): # removing pretrained embeddings paths from hyperparameters @@ -1158,10 +1158,11 @@ def save_savedmodel(self, save_path): # builder.save() pass - def restore(self, session, weights_path): + def restore(self,weights_path): # todo tf2: reintroduce this functionality # self.saver.restore(session, weights_path) - pass + self.ecd.load_weights(weights_path) + @staticmethod def load(load_path, use_horovod=False): diff --git a/ludwig/predict.py b/ludwig/predict.py index 99eb6a1f064..8a33e1d784a 100644 --- a/ludwig/predict.py +++ b/ludwig/predict.py @@ -99,7 +99,7 @@ def full_predict( gpu_fraction, debug ) - model.close_session() + # model.close_session() # todo tf2 code clean -up if is_on_master(): # setup directories and file names diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 583065800f0..df5e6e05544 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -1,38 +1,57 @@ import os.path import json +from collections import namedtuple import pandas as pd import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error import pytest from ludwig.experiment import full_experiment +from ludwig.predict import full_predict + +GeneratedData = namedtuple('GeneratedData', + 'train_df validation_df test_df') + +def get_feature_definitions(): + input_features = [ + {'name': 'x', 'type': 'numerical'}, + ] + output_features = [ + {'name': 'y', 'type': 'numerical', 'loss': {'type': 'mean_squared_error'}, + 'num_fc_layers': 5, 'fc_size': 64} + ] + + return input_features, output_features + @pytest.fixture -def train_df(): - # function generates simple training data tha guarantee convergence +def generated_data(): + # function generates simple training data that guarantee convergence # within 30 epochs for suitable model definition - NUMBER_OBSERVATIONS = 200 + NUMBER_OBSERVATIONS = 500 - # generate training data - np.random.seed(42) + # generate data + np.random.seed(43) x = np.array(range(NUMBER_OBSERVATIONS)).reshape(-1, 1) y = 2*x + 1 + np.random.normal(size=x.shape[0]).reshape(-1, 1) + raw_df = pd.DataFrame(np.concatenate((x, y), axis=1), columns=['x', 'y']) + + # create training data + train, valid_test = train_test_split(raw_df, train_size=0.7) - train_df = pd.DataFrame(np.concatenate((x, y), axis=1), columns=['x', 'y']) + # create validation and test data + validation, test = train_test_split(valid_test, train_size=0.5) - return train_df + return GeneratedData(train, validation, test) @pytest.mark.parametrize('early_stop', [3, 5]) -def test_early_stopping(early_stop, train_df, tmp_path): - # model definition guarantee convergence in under 30 epochs - input_features = [ - {'name': 'x', 'type': 'numerical'}, - ] - output_features = [ - {'name': 'y', 'type': 'numerical', 'loss': {'type': 'mean_squared_error'}, - 'num_fc_layers': 5, 'fc_size': 64} - ] +def test_early_stopping(early_stop, generated_data, tmp_path): + + input_features, output_features = get_feature_definitions() + model_definition = { 'input_features': input_features, 'output_features': output_features, @@ -40,7 +59,7 @@ def test_early_stopping(early_stop, train_df, tmp_path): 'type': 'concat' }, 'training': { - 'epochs': 100, + 'epochs': 30, 'early_stop': early_stop, 'batch_size': 16 } @@ -63,7 +82,12 @@ def test_early_stopping(early_stop, train_df, tmp_path): } # run experiment - exp_dir_name = full_experiment(data_df=train_df, **kwargs) + exp_dir_name = full_experiment( + data_train_df=generated_data.train_df, + data_validation_df=generated_data.validation_df, + data_test_df=generated_data.test_df, + **kwargs + ) # test existence of required files train_stats_fp = os.path.join(exp_dir_name, 'training_statistics.json') @@ -89,3 +113,43 @@ def test_early_stopping(early_stop, train_df, tmp_path): assert (last_epoch - best_epoch - 1) == early_stop_value +def test_model_save_resume(generated_data, tmp_path): + + input_features, output_features = get_feature_definitions() + model_definition = { + 'input_features': input_features, + 'output_features': output_features, + 'combiner': {'type': 'concat', 'fc_size': 14}, + 'training': {'epochs': 30, 'early_stop': 5} + } + + # create sub-directory to store results + results_dir = tmp_path / 'results' + results_dir.mkdir() + + exp_dir_name = full_experiment( + model_definition, + data_train_df=generated_data.train_df, + data_validation_df=generated_data.validation_df, + data_test_df=generated_data.test_df, + output_directory=results_dir + ) + + full_experiment( + model_definition, + data_train_df=generated_data.train_df, + model_resume_path=exp_dir_name + ) + + test_fp = os.path.join(tmp_path, 'data_to_predict.csv') + generated_data.test_df.to_csv( + test_fp, + index=False + ) + + full_predict(os.path.join(exp_dir_name, 'model'), data_csv=test_fp) + + y_pred = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) + + mse = mean_squared_error(y_pred, generated_data.test_df['y']) + pass \ No newline at end of file From 9d6627cfe276035a7936e785c6eea2c03422cd03 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Mon, 15 Jun 2020 06:30:39 -0400 Subject: [PATCH 03/34] refactor: eliminate warning pycharm warning message --- tests/integration_tests/test_model_training_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index df5e6e05544..a33de9b9d04 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -141,7 +141,7 @@ def test_model_save_resume(generated_data, tmp_path): model_resume_path=exp_dir_name ) - test_fp = os.path.join(tmp_path, 'data_to_predict.csv') + test_fp = os.path.join(str(tmp_path), 'data_to_predict.csv') generated_data.test_df.to_csv( test_fp, index=False From a70d0e5512067aeb98b1776e4dd3073e40a68913 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Mon, 15 Jun 2020 21:26:44 -0400 Subject: [PATCH 04/34] feat: add test for saving progress weights and final model --- .../test_model_training_options.py | 77 +++++++++++++++---- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index a33de9b9d04..06a93bba821 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -69,24 +69,18 @@ def test_early_stopping(early_stop, generated_data, tmp_path): results_dir = tmp_path / 'results' results_dir.mkdir() - # specify model training options - kwargs = { - 'output_directory':results_dir, - 'model_definition': model_definition, - 'skip_save_processed_input': True, - 'skip_save_progress': True, - 'skip_save_unprocessed_output': True, - 'skip_save_model': True, - 'skip_save_log': True - - } - # run experiment exp_dir_name = full_experiment( data_train_df=generated_data.train_df, data_validation_df=generated_data.validation_df, data_test_df=generated_data.test_df, - **kwargs + output_directory=str(results_dir), + model_definition=model_definition, + skip_save_processed_input=True, + skip_save_progress=True, + skip_save_unprocessed_output=True, + skip_save_model=True, + skip_save_log=True ) # test existence of required files @@ -112,7 +106,63 @@ def test_early_stopping(early_stop, generated_data, tmp_path): # confirm early stopping assert (last_epoch - best_epoch - 1) == early_stop_value +@pytest.mark.parametrize('skip_save_progress', [False, True]) +@pytest.mark.parametrize('skip_save_model', [False, True]) +def test_model_progress_save( + skip_save_progress, + skip_save_model, + generated_data, + tmp_path +): + + input_features, output_features = get_feature_definitions() + + model_definition = { + 'input_features': input_features, + 'output_features': output_features, + 'combiner': {'type': 'concat', 'fc_size': 14}, + 'training': {'epochs': 10} + } + + # create sub-directory to store results + results_dir = tmp_path / 'results' + results_dir.mkdir() + + # run experiment + exp_dir_name = full_experiment( + data_train_df=generated_data.train_df, + data_validation_df=generated_data.validation_df, + data_test_df=generated_data.test_df, + output_directory=str(results_dir), + model_definition=model_definition, + skip_save_processed_input=True, + skip_save_progress=skip_save_progress, + skip_save_unprocessed_output=True, + skip_save_model=skip_save_model, + skip_save_log=True + ) + #========== Check for required result data sets ============= + if skip_save_model: + assert not os.path.isfile( + os.path.join(exp_dir_name, 'model', 'model_weights.index') + ) + else: + assert os.path.isfile( + os.path.join(exp_dir_name, 'model', 'model_weights.index') + ) + + if skip_save_progress: + assert not os.path.isfile( + os.path.join(exp_dir_name, 'model', 'model_weights_progress.index') + ) + else: + assert os.path.isfile( + os.path.join(exp_dir_name, 'model', 'model_weights_progress.index') + ) + + +# work-in-progress def test_model_save_resume(generated_data, tmp_path): input_features, output_features = get_feature_definitions() @@ -152,4 +202,3 @@ def test_model_save_resume(generated_data, tmp_path): y_pred = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) mse = mean_squared_error(y_pred, generated_data.test_df['y']) - pass \ No newline at end of file From e45a155d97ae5f5df327a4c3842d4975bf8256d3 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Mon, 15 Jun 2020 23:03:29 -0400 Subject: [PATCH 05/34] feat: update restoring function --- ludwig/models/model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index c26b833b416..d4917bb54f1 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -379,8 +379,8 @@ def train( # ====== Setup session ======= # todo tf2: reintroduce restoring weights - # if self.weights_save_path: - # self.restore(session, self.weights_save_path) + if self.weights_save_path: + self.restore(self.weights_save_path) # todo tf2: reintroduce tensorboard logging # train_writer = None @@ -1298,7 +1298,6 @@ def initialize_batcher( def resume_session( self, - session, save_path, model_weights_path, model_weights_progress_path From 5a33a452d97e06b73a46ef36172d1add679e63d2 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Tue, 16 Jun 2020 20:16:45 -0400 Subject: [PATCH 06/34] test: change assertion test for model save/resume --- .../test_model_training_options.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 06a93bba821..e34ee39edbc 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -1,6 +1,7 @@ import os.path import json from collections import namedtuple +import shutil import pandas as pd import numpy as np @@ -10,7 +11,7 @@ import pytest from ludwig.experiment import full_experiment -from ludwig.predict import full_predict +from ludwig.predict import full_predict, predict GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') @@ -185,20 +186,17 @@ def test_model_save_resume(generated_data, tmp_path): output_directory=results_dir ) + y_pred1 = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) + full_experiment( model_definition, data_train_df=generated_data.train_df, + data_validation_df=generated_data.validation_df, + data_test_df=generated_data.test_df, model_resume_path=exp_dir_name ) - test_fp = os.path.join(str(tmp_path), 'data_to_predict.csv') - generated_data.test_df.to_csv( - test_fp, - index=False - ) - - full_predict(os.path.join(exp_dir_name, 'model'), data_csv=test_fp) + y_pred2 = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) - y_pred = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) + assert np.all(np.isclose(y_pred1, y_pred2)) - mse = mean_squared_error(y_pred, generated_data.test_df['y']) From 91bc289ceee24916533e81e56b5497413b4a95fe Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Tue, 16 Jun 2020 21:29:15 -0400 Subject: [PATCH 07/34] refactor: re-enable resume_session() and restore() methods --- ludwig/models/model.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index d4917bb54f1..8b06812a47e 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -406,13 +406,12 @@ def train( model_weights_path ) # todo tf3: reintroduce session resume - # if is_on_master(): - # self.resume_session( - # session, - # save_path, - # model_weights_path, - # model_weights_progress_path - # ) + if is_on_master(): + self.resume_session( + save_path, + model_weights_path, + model_weights_progress_path + ) else: ( train_metrics, @@ -1158,7 +1157,7 @@ def save_savedmodel(self, save_path): # builder.save() pass - def restore(self,weights_path): + def restore(self, weights_path): # todo tf2: reintroduce this functionality # self.saver.restore(session, weights_path) self.ecd.load_weights(weights_path) @@ -1308,9 +1307,9 @@ def resume_session( if pattern.match(file_path): num_matching_files += 1 if num_matching_files == 3: - self.restore(session, model_weights_progress_path) + self.restore(model_weights_progress_path) else: - self.restore(session, model_weights_path) + self.restore(model_weights_path) def reduce_learning_rate( self, From acb8bed03b3603a65a157b17315629e4b0a932eb Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Thu, 18 Jun 2020 22:05:53 -0400 Subject: [PATCH 08/34] fix: ValueError when saving model to disk --- ludwig/features/base_feature.py | 6 +++--- ludwig/models/model.py | 4 ++-- tests/integration_tests/test_model_training_options.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ludwig/features/base_feature.py b/ludwig/features/base_feature.py index 598fcc45202..ae735246c03 100644 --- a/ludwig/features/base_feature.py +++ b/ludwig/features/base_feature.py @@ -168,11 +168,12 @@ def call( ): # account for output feature target if isinstance(inputs, tuple): - inputs, target = inputs + local_inputs, target = inputs else: + local_inputs = inputs target = None - combiner_outputs, other_output_hidden = inputs + combiner_outputs, other_output_hidden = local_inputs # extract the combined hidden layer combiner_output = combiner_outputs['combiner_output'] @@ -389,6 +390,5 @@ def prepare_decoder_inputs( training=training, mask=mask ) - other_output_features[self.feature_name] = feature_hidden return feature_hidden diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 8b06812a47e..11d5e3f2716 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -1112,7 +1112,7 @@ def collect_weights( def save_weights(self, save_path): # todo tf2: reintroduce functionality #self.weights_save_path = self.saver.save(save_path) - self.ecd.save_weights(save_path) + self.ecd.save(save_path) def save_hyperparameters(self, hyperparameters, save_path): # removing pretrained embeddings paths from hyperparameters @@ -1160,7 +1160,7 @@ def save_savedmodel(self, save_path): def restore(self, weights_path): # todo tf2: reintroduce this functionality # self.saver.restore(session, weights_path) - self.ecd.load_weights(weights_path) + tf.keras.models.load_model(weights_path) @staticmethod diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index e34ee39edbc..db0b4082b2c 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -171,7 +171,7 @@ def test_model_save_resume(generated_data, tmp_path): 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat', 'fc_size': 14}, - 'training': {'epochs': 30, 'early_stop': 5} + 'training': {'epochs': 30, 'early_stop': 3} } # create sub-directory to store results From 026495988e6b4add4aa25a24877e06e3ae5926e9 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Fri, 19 Jun 2020 22:35:45 -0400 Subject: [PATCH 09/34] fix: resolve error when restoring saved model --- ludwig/models/modules/metric_modules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ludwig/models/modules/metric_modules.py b/ludwig/models/modules/metric_modules.py index 912c23665d5..7dd757eeeda 100644 --- a/ludwig/models/modules/metric_modules.py +++ b/ludwig/models/modules/metric_modules.py @@ -43,7 +43,7 @@ class R2Score(tf.keras.metrics.Metric): # todo tf2 - convert to tensors? - def __init__(self, name='r2_score'): + def __init__(self, name='r2_score', **kwargs): super(R2Score, self).__init__(name=name) self._reset_states() @@ -81,7 +81,7 @@ class ErrorScore(tf.keras.metrics.Metric): # todo tf2 - convert to tensors? - def __init__(self, name='error_score'): + def __init__(self, name='error_score', **kwargs): super(ErrorScore, self).__init__(name=name) self._reset_states() From 037e3b7c30e9b44ac57f9a5ffdb9cccc9694825b Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Fri, 19 Jun 2020 22:38:38 -0400 Subject: [PATCH 10/34] fix: adapt test to directory for TF2 saved model file storage refactor: misc changes to some tests --- .../test_model_training_options.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index db0b4082b2c..0c52b08627b 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -28,7 +28,7 @@ def get_feature_definitions(): return input_features, output_features -@pytest.fixture +@pytest.fixture(scope='module') def generated_data(): # function generates simple training data that guarantee convergence # within 30 epochs for suitable model definition @@ -121,7 +121,7 @@ def test_model_progress_save( model_definition = { 'input_features': input_features, 'output_features': output_features, - 'combiner': {'type': 'concat', 'fc_size': 14}, + 'combiner': {'type': 'concat'}, 'training': {'epochs': 10} } @@ -141,25 +141,25 @@ def test_model_progress_save( skip_save_unprocessed_output=True, skip_save_model=skip_save_model, skip_save_log=True - ) + )˚ #========== Check for required result data sets ============= if skip_save_model: - assert not os.path.isfile( - os.path.join(exp_dir_name, 'model', 'model_weights.index') + assert not os.path.isdir( + os.path.join(exp_dir_name, 'model', 'model_weights') ) else: - assert os.path.isfile( - os.path.join(exp_dir_name, 'model', 'model_weights.index') + assert os.path.isdir( + os.path.join(exp_dir_name, 'model', 'model_weights') ) if skip_save_progress: - assert not os.path.isfile( - os.path.join(exp_dir_name, 'model', 'model_weights_progress.index') + assert not os.path.isdir( + os.path.join(exp_dir_name, 'model', 'model_weights_progress') ) else: - assert os.path.isfile( - os.path.join(exp_dir_name, 'model', 'model_weights_progress.index') + assert os.path.isdir( + os.path.join(exp_dir_name, 'model', 'model_weights_progress') ) @@ -170,8 +170,8 @@ def test_model_save_resume(generated_data, tmp_path): model_definition = { 'input_features': input_features, 'output_features': output_features, - 'combiner': {'type': 'concat', 'fc_size': 14}, - 'training': {'epochs': 30, 'early_stop': 3} + 'combiner': {'type': 'concat'}, + 'training': {'epochs': 30, 'early_stop': 3, 'batch_size': 16} } # create sub-directory to store results From 1444c6931a8afcf7fe768d1894b1ef0401201fd2 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Fri, 19 Jun 2020 22:44:41 -0400 Subject: [PATCH 11/34] fix: syntax error --- tests/integration_tests/test_model_training_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 0c52b08627b..60b24a05495 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -141,7 +141,7 @@ def test_model_progress_save( skip_save_unprocessed_output=True, skip_save_model=skip_save_model, skip_save_log=True - )˚ + ) #========== Check for required result data sets ============= if skip_save_model: From ddc715790a25e2b971f2bcba30d243e3e5a7c0e2 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Fri, 19 Jun 2020 22:47:05 -0400 Subject: [PATCH 12/34] fix: TEMPORARY CODE FOR DEBUGGING PURPOSES - NEED TO BE REPLACED --- ludwig/models/model.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 11d5e3f2716..9af919f330d 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -56,6 +56,9 @@ from ludwig.utils.misc import set_random_seed from ludwig.utils.misc import sum_dicts +from ludwig.features.numerical_feature import MSEMetric #todo tf2 testing code +from ludwig.models.modules.metric_modules import ErrorScore, R2Score + logger = logging.getLogger(__name__) tf.config.experimental_run_functions_eagerly(True) @@ -1158,9 +1161,15 @@ def save_savedmodel(self, save_path): pass def restore(self, weights_path): - # todo tf2: reintroduce this functionality - # self.saver.restore(session, weights_path) - tf.keras.models.load_model(weights_path) + # todo tf2: clean up debugging code + tf.keras.models.load_model( + weights_path, + custom_objects={ + 'MSEMetric': MSEMetric, + 'ErrorScore': ErrorScore, + 'R2Score': R2Score + } + ) @staticmethod From 48a9f41d4262b404ba6216fa2aed578f033d28f3 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sat, 20 Jun 2020 12:15:54 -0400 Subject: [PATCH 13/34] feat: for model save/restore support dictionary of custom objects --- ludwig/models/model.py | 49 +++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 9af919f330d..b298c4c8ee7 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -24,15 +24,18 @@ import copy import logging import os +import os.path +import pickle import re import signal import sys import threading +import tempfile import time from collections import OrderedDict import tensorflow as tf -# import tensorflow as tf2 # todo: tf2 port + from tabulate import tabulate from tqdm import tqdm @@ -56,9 +59,6 @@ from ludwig.utils.misc import set_random_seed from ludwig.utils.misc import sum_dicts -from ludwig.features.numerical_feature import MSEMetric #todo tf2 testing code -from ludwig.models.modules.metric_modules import ErrorScore, R2Score - logger = logging.getLogger(__name__) tf.config.experimental_run_functions_eagerly(True) @@ -1113,8 +1113,29 @@ def collect_weights( pass def save_weights(self, save_path): - # todo tf2: reintroduce functionality - #self.weights_save_path = self.saver.save(save_path) + # collect all custom metrics used in output features + custom_objects = {} + for of_name, of in self.ecd.output_features.items(): + for mfn_name, mfn_obj in of.metric_functions.items(): + # if module name starts with 'ludwig' this is a custom metric + if mfn_obj.__class__.__module__[:6] == 'ludwig': + custom_objects.update( + {mfn_obj.__class__.__name__: mfn_obj.__class__} + ) + # create pickle of the custom object and save pickled + # file in the saved model 'asset' directory + with tempfile.TemporaryDirectory() as tmpdirname: + custom_objects_fp = os.path.join(tmpdirname, "ludwig_custom_objects.pkl") + with open(custom_objects_fp, "wb") as f: + pickle.dump(custom_objects, f) + # add pickle file to trackable object + trackable_obj = tf.train.Checkpoint() + custom_objects_asset = tf.saved_model.Asset(custom_objects_fp) + trackable_obj.custom_objects_asset = custom_objects_asset + # save trackable object in the saved model asset directory + tf.saved_model.save(trackable_obj, save_path) + + # save model self.ecd.save(save_path) def save_hyperparameters(self, hyperparameters, save_path): @@ -1161,14 +1182,18 @@ def save_savedmodel(self, save_path): pass def restore(self, weights_path): - # todo tf2: clean up debugging code + # retrieve custom objects + customs_objects_fp = os.path.join( + weights_path, + 'assets', + 'ludwig_custom_objects.pkl' + ) + with open(customs_objects_fp, 'rb') as f: + custom_objects = pickle.load(f) + tf.keras.models.load_model( weights_path, - custom_objects={ - 'MSEMetric': MSEMetric, - 'ErrorScore': ErrorScore, - 'R2Score': R2Score - } + custom_objects=custom_objects ) From 0e5d1e34b0aa10a3032ef9cee7f1de2816fe166e Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sun, 21 Jun 2020 21:37:06 -0400 Subject: [PATCH 14/34] test: VERSION USED FOR DEBUGGING --- .../test_model_training_options.py | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 60b24a05495..8de8a1ef5c6 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -10,8 +10,10 @@ import pytest +import tensorflow as tf from ludwig.experiment import full_experiment -from ludwig.predict import full_predict, predict +from ludwig.api import LudwigModel + GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') @@ -122,7 +124,7 @@ def test_model_progress_save( 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat'}, - 'training': {'epochs': 10} + 'training': {'epochs': 5} } # create sub-directory to store results @@ -171,7 +173,7 @@ def test_model_save_resume(generated_data, tmp_path): 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat'}, - 'training': {'epochs': 30, 'early_stop': 3, 'batch_size': 16} + 'training': {'epochs': 10, 'early_stop': 0, 'batch_size': 16} } # create sub-directory to store results @@ -183,11 +185,13 @@ def test_model_save_resume(generated_data, tmp_path): data_train_df=generated_data.train_df, data_validation_df=generated_data.validation_df, data_test_df=generated_data.test_df, - output_directory=results_dir + output_directory='results' #results_dir ) y_pred1 = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) + model_definition['training']['epochs'] = 20 + full_experiment( model_definition, data_train_df=generated_data.train_df, @@ -200,3 +204,41 @@ def test_model_save_resume(generated_data, tmp_path): assert np.all(np.isclose(y_pred1, y_pred2)) +# work-in-progress +# def test_model_save_resume(generated_data, tmp_path): +# +# input_features, output_features = get_feature_definitions() +# model_definition = { +# 'input_features': input_features, +# 'output_features': output_features, +# 'combiner': {'type': 'concat'}, +# 'training': {'epochs': 3, 'batch_size': 16} +# } +# +# # create sub-directory to store results +# results_dir = tmp_path / 'results' +# results_dir.mkdir() +# +# # perform inital model training +# ludwig_model = LudwigModel(model_definition) +# train_stats = ludwig_model.train( +# data_train_df=generated_data.train_df, +# data_validation_df=generated_data.validation_df, +# data_test_df=generated_data.test_df, +# output_directory='results' #results_dir +# ) +# +# # load saved model definition +# ludwig_model2 = LudwigModel.load( +# os.path.join(ludwig_model.exp_dir_name, 'model') +# ) +# +# for _, i_feature in ludwig_model2.model.ecd.input_features.items(): +# i_feature.encoder_obj(None, training=False) +# +# ludwig_model2.model.ecd.combiner({'x': {'encoder_output': [None]}}, training=False) +# +# for _, o_feature in ludwig_model2.model.ecd.output_features.items(): +# o_feature.decoder_obj(None, training=False) +# +# pass From fccd4b11c2d073ec6508a79abc4eb2f2eb037f70 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sat, 27 Jun 2020 11:35:14 -0400 Subject: [PATCH 15/34] refactor: change from savedmodel to save_weights approach --- ludwig/models/model.py | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index b298c4c8ee7..8fe63a2b0ce 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -452,6 +452,7 @@ def train( # ================ Training Loop ================ while progress_tracker.epoch < self.epochs: + print(">>>> progress tracker epoch", progress_tracker.epoch) # epoch init start_time = time.time() if is_on_master(): @@ -1113,30 +1114,8 @@ def collect_weights( pass def save_weights(self, save_path): - # collect all custom metrics used in output features - custom_objects = {} - for of_name, of in self.ecd.output_features.items(): - for mfn_name, mfn_obj in of.metric_functions.items(): - # if module name starts with 'ludwig' this is a custom metric - if mfn_obj.__class__.__module__[:6] == 'ludwig': - custom_objects.update( - {mfn_obj.__class__.__name__: mfn_obj.__class__} - ) - # create pickle of the custom object and save pickled - # file in the saved model 'asset' directory - with tempfile.TemporaryDirectory() as tmpdirname: - custom_objects_fp = os.path.join(tmpdirname, "ludwig_custom_objects.pkl") - with open(custom_objects_fp, "wb") as f: - pickle.dump(custom_objects, f) - # add pickle file to trackable object - trackable_obj = tf.train.Checkpoint() - custom_objects_asset = tf.saved_model.Asset(custom_objects_fp) - trackable_obj.custom_objects_asset = custom_objects_asset - # save trackable object in the saved model asset directory - tf.saved_model.save(trackable_obj, save_path) - # save model - self.ecd.save(save_path) + self.ecd.save_weights(save_path) def save_hyperparameters(self, hyperparameters, save_path): # removing pretrained embeddings paths from hyperparameters @@ -1182,19 +1161,9 @@ def save_savedmodel(self, save_path): pass def restore(self, weights_path): - # retrieve custom objects - customs_objects_fp = os.path.join( - weights_path, - 'assets', - 'ludwig_custom_objects.pkl' - ) - with open(customs_objects_fp, 'rb') as f: - custom_objects = pickle.load(f) - - tf.keras.models.load_model( - weights_path, - custom_objects=custom_objects - ) + self.ecd.load_weights(weights_path) + weights = self.ecd.get_weights() + pass @staticmethod From 157caa3723d6c91d0df50fbfa1c17415ff7ffcc1 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sat, 27 Jun 2020 13:33:37 -0400 Subject: [PATCH 16/34] refactor: remove hack for initializing weights --- ludwig/models/model.py | 3 --- tests/integration_tests/test_model_training_options.py | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 8fe63a2b0ce..8ee2734200d 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -1162,9 +1162,6 @@ def save_savedmodel(self, save_path): def restore(self, weights_path): self.ecd.load_weights(weights_path) - weights = self.ecd.get_weights() - pass - @staticmethod def load(load_path, use_horovod=False): diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 8de8a1ef5c6..a37a22ec66b 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -173,7 +173,7 @@ def test_model_save_resume(generated_data, tmp_path): 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat'}, - 'training': {'epochs': 10, 'early_stop': 0, 'batch_size': 16} + 'training': {'epochs': 5, 'early_stop': 0, 'batch_size': 16} } # create sub-directory to store results @@ -190,7 +190,7 @@ def test_model_save_resume(generated_data, tmp_path): y_pred1 = np.load(os.path.join(exp_dir_name, 'y_predictions.npy')) - model_definition['training']['epochs'] = 20 + model_definition['training']['epochs'] = 15 full_experiment( model_definition, From c0d28f263a051c6def673f7148eed1accc79273e Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sat, 27 Jun 2020 21:34:30 -0400 Subject: [PATCH 17/34] fix: reporting metrics in wrong order when resuming model training --- ludwig/models/model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index 8ee2734200d..c1f00b42d0d 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -759,7 +759,12 @@ def append_metrics(self, dataset_name, results, metrics_log, tables): for output_feature in self.ecd.output_features: scores = [dataset_name] - for metric in metrics_log[output_feature]: + # collect metric names based on output features metrics to + # ensure consistent order of reporting metrics + metric_names = self.ecd.output_features[output_feature]\ + .metric_functions.keys() + + for metric in metric_names: score = results[output_feature][metric] metrics_log[output_feature][metric].append(score) scores.append(score) From 799406d97eae1ca02c50e657c797ad619b924712 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sun, 28 Jun 2020 00:50:09 -0400 Subject: [PATCH 18/34] feat: initial working LudwigModel.predict() method with TF2 --- ludwig/models/model.py | 62 ++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index c1f00b42d0d..e75a3002c5e 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -163,8 +163,8 @@ def evaluation_step(self, model, inputs, targets): return predictions @tf.function - def predict_step(self, model, x): - return model(x, training=False) + def predict_step(self, model, inputs): + return model.predictions(inputs, output_features=None) # def initialize_session(self, gpus=None, gpu_fraction=1): # if self.session is None: @@ -809,15 +809,24 @@ def batch_evaluation( # create array for predictors # todo: tf2 need to handle case of single predictor, e.g., image inputs = {i_feat['name']: batch[i_feat['name']] for i_feat in self.hyperparameters['input_features']} - targets = {o_feat['name']: batch[o_feat['name']] for o_feat in self.hyperparameters['output_features']} - ( - preds - ) = self.evaluation_step( - self.ecd, - inputs, - targets - ) + if only_predictions: + ( + preds + ) = self.predict_step( + self.ecd, + inputs + ) + else: + targets = {o_feat['name']: batch[o_feat['name']] for o_feat in self.hyperparameters['output_features']} + + ( + preds + ) = self.evaluation_step( + self.ecd, + inputs, + targets + ) # accumulate predictions from batch for each output feature for of_name, of_preds in preds.items(): @@ -846,9 +855,12 @@ def batch_evaluation( for pred_name, pred_value_list in of_predictions.items(): predictions[of_name][pred_name] = tf.concat(pred_value_list, axis=0) - metrics = self.ecd.get_metrics() - self.ecd.reset_metrics() - return metrics, predictions + if only_predictions: + return predictions + else: + metrics = self.ecd.get_metrics() + self.ecd.reset_metrics() + return metrics, predictions def evaluation( self, @@ -1035,14 +1047,22 @@ def predict( **kwargs ): # predict - eval_metrics, eval_predictions = self.batch_evaluation( - dataset, - batch_size, - collect_predictions=True, - only_predictions=not evaluate_performance - ) - - return eval_metrics, eval_predictions + if evaluate_performance: + eval_metrics, eval_predictions = self.batch_evaluation( + dataset, + batch_size, + collect_predictions=True, + only_predictions=not evaluate_performance + ) + return eval_metrics, eval_predictions + else: + eval_predictions = self.batch_evaluation( + dataset, + batch_size, + collect_predictions=True, + only_predictions=not evaluate_performance + ) + return eval_predictions def collect_activations( self, From d4035fc10ad08744a1da80cadfd81d9ddf4518c0 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sun, 28 Jun 2020 09:26:18 -0400 Subject: [PATCH 19/34] feat: allow specification of optimizer --- tests/integration_tests/test_model_training_options.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index a37a22ec66b..c4961010433 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -173,7 +173,12 @@ def test_model_save_resume(generated_data, tmp_path): 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat'}, - 'training': {'epochs': 5, 'early_stop': 0, 'batch_size': 16} + 'training': { + 'epochs': 5, + 'early_stop': 0, + 'batch_size': 16, + 'optimizer': {'type': 'adam'} + } } # create sub-directory to store results From 0fdeed63a55199712112362be661d4acf640c0d3 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Sun, 28 Jun 2020 10:09:05 -0400 Subject: [PATCH 20/34] fix: restoration of saved model weights --- ludwig/models/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ludwig/models/model.py b/ludwig/models/model.py index e75a3002c5e..b5d062d403b 100644 --- a/ludwig/models/model.py +++ b/ludwig/models/model.py @@ -1200,6 +1200,7 @@ def load(load_path, use_horovod=False): load_path, MODEL_WEIGHTS_FILE_NAME ) + model.restore(model.weights_save_path) return model def set_epochs_to_1_or_quit(self, signum, frame): From 182eea28676c83f029e8eedf8060ccd5db259785 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Mon, 29 Jun 2020 18:06:46 -0700 Subject: [PATCH 21/34] Added save and reload test using APIs --- .../test_model_training_options.py | 74 ++++++++++++++++--- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index c4961010433..4a61b742ff3 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -1,26 +1,23 @@ -import os.path import json +import os.path from collections import namedtuple -import shutil -import pandas as pd import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.metrics import mean_squared_error - +import pandas as pd import pytest +from pandas.util.testing import assert_frame_equal +from sklearn.model_selection import train_test_split -import tensorflow as tf -from ludwig.experiment import full_experiment from ludwig.api import LudwigModel - +from ludwig.experiment import full_experiment GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') + def get_feature_definitions(): input_features = [ - {'name': 'x', 'type': 'numerical'}, + {'name': 'x', 'type': 'numerical'}, ] output_features = [ {'name': 'y', 'type': 'numerical', 'loss': {'type': 'mean_squared_error'}, @@ -174,8 +171,8 @@ def test_model_save_resume(generated_data, tmp_path): 'output_features': output_features, 'combiner': {'type': 'concat'}, 'training': { - 'epochs': 5, - 'early_stop': 0, + 'epochs': 7, + 'early_stop': 1000, 'batch_size': 16, 'optimizer': {'type': 'adam'} } @@ -247,3 +244,56 @@ def test_model_save_resume(generated_data, tmp_path): # o_feature.decoder_obj(None, training=False) # # pass + + +def test_model_save_reload_API(generated_data, tmp_path): + input_features, output_features = get_feature_definitions() + model_definition = { + 'input_features': input_features, + 'output_features': output_features, + 'combiner': {'type': 'concat'}, + 'training': {'epochs': 3, 'batch_size': 16} + } + + # create sub-directory to store results + results_dir = tmp_path / 'results' + results_dir.mkdir() + + # perform initial model training + ludwig_model1 = LudwigModel(model_definition) + train_stats = ludwig_model1.train( + data_train_df=generated_data.train_df, + data_validation_df=generated_data.validation_df, + data_test_df=generated_data.test_df, + output_directory='results' # results_dir + ) + + preds_1 = ludwig_model1.predict(data_df=generated_data.validation_df) + + # load saved model + ludwig_model2 = LudwigModel.load( + os.path.join(ludwig_model1.exp_dir_name, 'model') + ) + + preds_2 = ludwig_model2.predict(data_df=generated_data.validation_df) + + assert_frame_equal(preds_1, preds_2) + + for if_name in ludwig_model1.model.ecd.input_features: + if1 = ludwig_model1.model.ecd.input_features[if_name] + if2 = ludwig_model2.model.ecd.input_features[if_name] + for if1_w, if2_w in zip(if1.encoder_obj.weights, + if2.encoder_obj.weights): + assert np.allclose(if1_w.numpy(), if2_w.numpy()) + + c1 = ludwig_model1.model.ecd.combiner + c2 = ludwig_model2.model.ecd.combiner + for c1_w, c2_w in zip(c1.weights, c2.weights): + assert np.allclose(c1_w.numpy(), c2_w.numpy()) + + for of_name in ludwig_model1.model.ecd.output_features: + of1 = ludwig_model1.model.ecd.output_features[of_name] + of2 = ludwig_model2.model.ecd.output_features[of_name] + for of1_w, of2_w in zip(of1.decoder_obj.weights, + of2.decoder_obj.weights): + assert np.allclose(of1_w.numpy(), of2_w.numpy()) From 0e88675df9715855bcfe762e9e39b59c18aaa1e9 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Mon, 29 Jun 2020 18:37:32 -0700 Subject: [PATCH 22/34] Fix: encoder creation in binary feature --- ludwig/features/binary_feature.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ludwig/features/binary_feature.py b/ludwig/features/binary_feature.py index 18c442caeb2..a582a0170ca 100644 --- a/ludwig/features/binary_feature.py +++ b/ludwig/features/binary_feature.py @@ -35,7 +35,7 @@ from ludwig.utils.metrics_utils import precision_recall_curve from ludwig.utils.metrics_utils import roc_auc_score from ludwig.utils.metrics_utils import roc_curve -from ludwig.utils.misc import set_default_value, get_from_registry +from ludwig.utils.misc import set_default_value from ludwig.utils.misc import set_default_values logger = logging.getLogger(__name__) @@ -79,12 +79,7 @@ def __init__(self, feature, encoder_obj=None): if encoder_obj: self.encoder_obj = encoder_obj else: - self.encoder_obj = self.get_binary_encoder(feature) - - def get_binary_encoder(self, encoder_parameters): - return get_from_registry(self.encoder, self.encoder_registry)( - **encoder_parameters - ) + self.encoder_obj = self.initialize_encoder(feature) def call(self, inputs, training=None, mask=None): assert isinstance(inputs, tf.Tensor) From fc809f9a47a4ebdc7d6de6ce76ae4886507da6c9 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Mon, 29 Jun 2020 18:37:49 -0700 Subject: [PATCH 23/34] Expanded the test_model_save_reload_API test --- .../test_model_training_options.py | 73 ++++++++++++++++--- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 4a61b742ff3..e842e93c702 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -5,11 +5,15 @@ import numpy as np import pandas as pd import pytest -from pandas.util.testing import assert_frame_equal from sklearn.model_selection import train_test_split from ludwig.api import LudwigModel +from ludwig.data.preprocessing import get_split from ludwig.experiment import full_experiment +from ludwig.utils.data_utils import split_dataset_tvt, read_csv +from tests.integration_tests.utils import binary_feature, numerical_feature, \ + category_feature, sequence_feature, date_feature, h3_feature, \ + set_feature, generate_data GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') @@ -18,7 +22,7 @@ def get_feature_definitions(): input_features = [ {'name': 'x', 'type': 'numerical'}, - ] + ] output_features = [ {'name': 'y', 'type': 'numerical', 'loss': {'type': 'mean_squared_error'}, 'num_fc_layers': 5, 'fc_size': 64} @@ -246,15 +250,59 @@ def test_model_save_resume(generated_data, tmp_path): # pass -def test_model_save_reload_API(generated_data, tmp_path): - input_features, output_features = get_feature_definitions() +def test_model_save_reload_API(csv_filename, tmp_path): + dir_path = os.path.dirname(csv_filename) + image_dest_folder = os.path.join(os.getcwd(), 'generated_images') + audio_dest_folder = os.path.join(os.getcwd(), 'generated_audio') + + input_features = [ + binary_feature(), + numerical_feature(), + category_feature(vocab_size=3), + sequence_feature(vocab_size=3), + # text_feature(vocab_size=3), + # vector_feature(), + # image_feature(image_dest_folder), + # audio_feature(audio_dest_folder), + # timeseries_feature(), + date_feature(), + h3_feature(), + set_feature(vocab_size=3), + # bag_feature(vocab_size=3), + ] + + output_features = [ + binary_feature(), + numerical_feature(), + # category_feature(vocab_size=3), + # sequence_feature(vocab_size=3), + # text_feature(vocab_size=3), + # set_feature(vocab_size=3), + # vector_feature() + ] + + # Generate test data + data_csv_path = generate_data(input_features, output_features, + csv_filename) + + ############# + # Train model + ############# model_definition = { 'input_features': input_features, 'output_features': output_features, - 'combiner': {'type': 'concat'}, - 'training': {'epochs': 3, 'batch_size': 16} + 'training': {'epochs': 2} } + data_df = read_csv(data_csv_path) + training_set, test_set, validation_set = split_dataset_tvt( + data_df, + get_split(data_df) + ) + training_set = pd.DataFrame(training_set) + validation_set = pd.DataFrame(validation_set) + test_set = pd.DataFrame(test_set) + # create sub-directory to store results results_dir = tmp_path / 'results' results_dir.mkdir() @@ -262,22 +310,23 @@ def test_model_save_reload_API(generated_data, tmp_path): # perform initial model training ludwig_model1 = LudwigModel(model_definition) train_stats = ludwig_model1.train( - data_train_df=generated_data.train_df, - data_validation_df=generated_data.validation_df, - data_test_df=generated_data.test_df, + data_train_df=training_set, + data_validation_df=validation_set, + data_test_df=test_set, output_directory='results' # results_dir ) - preds_1 = ludwig_model1.predict(data_df=generated_data.validation_df) + preds_1 = ludwig_model1.predict(data_df=validation_set) # load saved model ludwig_model2 = LudwigModel.load( os.path.join(ludwig_model1.exp_dir_name, 'model') ) - preds_2 = ludwig_model2.predict(data_df=generated_data.validation_df) + preds_2 = ludwig_model2.predict(data_df=validation_set) - assert_frame_equal(preds_1, preds_2) + for key in preds_1: + assert np.allclose(preds_1[key], preds_2[key]) for if_name in ludwig_model1.model.ecd.input_features: if1 = ludwig_model1.model.ecd.input_features[if_name] From da1ef01d5f1a628bea1834ed7de5b6a7acde4b28 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 17:04:55 -0700 Subject: [PATCH 24/34] Fix: vector fature encoder return from dict of dict to dict --- ludwig/features/vector_feature.py | 3 +-- tests/integration_tests/test_model_training_options.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ludwig/features/vector_feature.py b/ludwig/features/vector_feature.py index 6add10af8a5..c44058cf2ed 100644 --- a/ludwig/features/vector_feature.py +++ b/ludwig/features/vector_feature.py @@ -18,7 +18,6 @@ import os import numpy as np - import tensorflow as tf from tensorflow.keras.losses import MeanAbsoluteError from tensorflow.keras.losses import MeanSquaredError @@ -167,7 +166,7 @@ def call(self, inputs, training=None, mask=None): inputs, training=training, mask=mask ) - return {'encoder_outputs': inputs_encoded} + return inputs_encoded @staticmethod def update_model_definition_with_metadata( diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index e842e93c702..aa86f04b608 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -13,7 +13,7 @@ from ludwig.utils.data_utils import split_dataset_tvt, read_csv from tests.integration_tests.utils import binary_feature, numerical_feature, \ category_feature, sequence_feature, date_feature, h3_feature, \ - set_feature, generate_data + set_feature, generate_data, text_feature, vector_feature GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') @@ -260,8 +260,8 @@ def test_model_save_reload_API(csv_filename, tmp_path): numerical_feature(), category_feature(vocab_size=3), sequence_feature(vocab_size=3), - # text_feature(vocab_size=3), - # vector_feature(), + text_feature(vocab_size=3), + vector_feature(), # image_feature(image_dest_folder), # audio_feature(audio_dest_folder), # timeseries_feature(), From dbe7c3364672bcb8c3907ffb69c1b781056949a4 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 17:17:40 -0700 Subject: [PATCH 25/34] Fix: bag feature_data when input is a dataframe --- ludwig/features/bag_feature.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ludwig/features/bag_feature.py b/ludwig/features/bag_feature.py index a654e32bbe5..e8f1d79340b 100644 --- a/ludwig/features/bag_feature.py +++ b/ludwig/features/bag_feature.py @@ -25,7 +25,6 @@ from ludwig.features.base_feature import InputFeature from ludwig.features.feature_utils import set_str_to_idx from ludwig.models.modules.bag_encoders import BagEmbedWeightedEncoder -from ludwig.models.modules.embedding_modules import EmbedWeighted from ludwig.utils.misc import set_default_value from ludwig.utils.strings_utils import create_vocabulary @@ -70,13 +69,14 @@ def feature_data(column, metadata, preprocessing_parameters): dtype=float ) - for i in range(len(column)): + for i, set_str in enumerate(column): col_counter = Counter(set_str_to_idx( - column[i], + set_str, metadata['str2idx'], preprocessing_parameters['tokenizer']) ) - bag_matrix[i, list(col_counter.keys())] = list(col_counter.values()) + bag_matrix[i, list(col_counter.keys())] = list( + col_counter.values()) return bag_matrix From 88df360670dacb7aad3a55d28bb19289a8ce2153 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 18:12:59 -0700 Subject: [PATCH 26/34] Fix: image feature_data when input is a dataframe --- ludwig/features/image_feature.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py index 5612548cff9..b5e73694b9c 100644 --- a/ludwig/features/image_feature.py +++ b/ludwig/features/image_feature.py @@ -252,7 +252,10 @@ def add_feature_data( if num_images == 0: raise ValueError('There are no images in the dataset provided.') - first_image_path = dataset_df[feature['name']][0] + # this is not super nice, but works both and DFs and lists + for first_image_path in dataset_df[feature['name']]: + break + if csv_path is None and not os.path.isabs(first_image_path): raise ValueError('Image file paths must be absolute') From 8593dd96432ce817f92b2570b4e60bf4095d6908 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 18:20:10 -0700 Subject: [PATCH 27/34] Fix: image feature_data cleanup --- ludwig/features/image_feature.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py index b5e73694b9c..e7ea37e8ecb 100644 --- a/ludwig/features/image_feature.py +++ b/ludwig/features/image_feature.py @@ -253,13 +253,14 @@ def add_feature_data( raise ValueError('There are no images in the dataset provided.') # this is not super nice, but works both and DFs and lists - for first_image_path in dataset_df[feature['name']]: + first_path = '.' + for first_path in dataset_df[feature['name']]: break - if csv_path is None and not os.path.isabs(first_image_path): + if csv_path is None and not os.path.isabs(first_path): raise ValueError('Image file paths must be absolute') - first_image_path = get_abs_path(csv_path, first_image_path) + first_path = get_abs_path(csv_path, first_path) ( should_resize, @@ -269,7 +270,7 @@ def add_feature_data( user_specified_num_channels, first_image ) = ImageBaseFeature._finalize_preprocessing_parameters( - preprocessing_parameters, first_image_path + preprocessing_parameters, first_path ) metadata[feature['name']]['preprocessing']['height'] = height From 64efb3ae8506f853997074afcd451312111f528e Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 18:20:21 -0700 Subject: [PATCH 28/34] Fix: audio feature_data when input is a dataframe --- ludwig/features/audio_feature.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ludwig/features/audio_feature.py b/ludwig/features/audio_feature.py index dce482c83c5..9011e8a47a1 100644 --- a/ludwig/features/audio_feature.py +++ b/ludwig/features/audio_feature.py @@ -243,10 +243,13 @@ def add_feature_data( 'for audio.') csv_path = None + # this is not super nice, but works both and DFs and lists + first_path = '.' + for first_path in dataset_df[feature['name']]: + break if hasattr(dataset_df, 'csv'): csv_path = os.path.dirname(os.path.abspath(dataset_df.csv)) - if (csv_path is None and - not os.path.isabs(dataset_df[feature['name']][0])): + if csv_path is None and not os.path.isabs(first_path): raise ValueError( 'Audio file paths must be absolute' ) @@ -281,10 +284,10 @@ def add_feature_data( (num_audio_utterances, max_length, feature_dim), dtype=np.float32 ) - for i in range(len(dataset_df)): + for i, path in enumerate(dataset_df[feature['name']]): filepath = get_abs_path( csv_path, - dataset_df[feature['name']][i] + path ) audio_feature = AudioBaseFeature._read_audio_and_transform_to_feature( filepath, audio_feature_dict, feature_dim, max_length, From fe0245e92961d166fd5a83683571b99f6693abae Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 18:24:07 -0700 Subject: [PATCH 29/34] Fix: most input features now work in test_model_save_reload_API --- tests/integration_tests/test_model_training_options.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index aa86f04b608..d1ab099a742 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -13,7 +13,8 @@ from ludwig.utils.data_utils import split_dataset_tvt, read_csv from tests.integration_tests.utils import binary_feature, numerical_feature, \ category_feature, sequence_feature, date_feature, h3_feature, \ - set_feature, generate_data, text_feature, vector_feature + set_feature, generate_data, text_feature, vector_feature, bag_feature, \ + image_feature, audio_feature GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') @@ -262,13 +263,13 @@ def test_model_save_reload_API(csv_filename, tmp_path): sequence_feature(vocab_size=3), text_feature(vocab_size=3), vector_feature(), - # image_feature(image_dest_folder), - # audio_feature(audio_dest_folder), + image_feature(image_dest_folder), + audio_feature(audio_dest_folder), # timeseries_feature(), date_feature(), h3_feature(), set_feature(vocab_size=3), - # bag_feature(vocab_size=3), + bag_feature(vocab_size=3), ] output_features = [ From 8e13c08bfcdb2b232db9a114aba0d563e7354148 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 19:18:56 -0700 Subject: [PATCH 30/34] Fix: set output feature bugs (missing import for loss in metrics, missed kwargs and missing call to .numpy() on TF tesnor) --- ludwig/features/set_feature.py | 14 ++++++++------ ludwig/models/modules/metric_modules.py | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/ludwig/features/set_feature.py b/ludwig/features/set_feature.py index 6ff85183f23..8d7c68d8f94 100644 --- a/ludwig/features/set_feature.py +++ b/ludwig/features/set_feature.py @@ -20,7 +20,6 @@ import numpy as np import tensorflow as tf - from tensorflow.keras.metrics import MeanIoU from ludwig.constants import * @@ -28,11 +27,10 @@ from ludwig.features.base_feature import InputFeature from ludwig.features.base_feature import OutputFeature from ludwig.features.feature_utils import set_str_to_idx -from ludwig.models.modules.initializer_modules import get_initializer from ludwig.models.modules.loss_modules import SigmoidCrossEntropyLoss from ludwig.models.modules.metric_modules import SigmoidCrossEntropyMetric -from ludwig.models.modules.set_encoders import SetSparseEncoder from ludwig.models.modules.set_decoders import Classifier +from ludwig.models.modules.set_encoders import SetSparseEncoder from ludwig.utils.misc import set_default_value from ludwig.utils.strings_utils import create_vocabulary @@ -168,12 +166,15 @@ def __init__(self, feature): def logits( self, inputs, # hidden + **kwargs ): - return self.decoder_obj(inputs) + hidden = inputs[HIDDEN] + return self.decoder_obj(hidden) def predictions( self, inputs, # logits + **kwargs ): logits = inputs[LOGITS] @@ -284,9 +285,10 @@ def postprocess_results( del result[PREDICTIONS] if PROBABILITIES in result and len(result[PROBABILITIES]) > 0: - probs = result[PROBABILITIES] + probs = result[PROBABILITIES].numpy() prob = [[prob for prob in prob_set if - prob >= output_feature['threshold']] for prob_set in probs] + prob >= output_feature['threshold']] for prob_set in + probs] postprocessed[PROBABILITIES] = probs postprocessed[PROBABILITY] = prob diff --git a/ludwig/models/modules/metric_modules.py b/ludwig/models/modules/metric_modules.py index 7dd757eeeda..09577e39f6b 100644 --- a/ludwig/models/modules/metric_modules.py +++ b/ludwig/models/modules/metric_modules.py @@ -18,7 +18,8 @@ import tensorflow as tf from ludwig.constants import * -from ludwig.models.modules.loss_modules import BWCEWLoss +from ludwig.models.modules.loss_modules import BWCEWLoss, \ + SigmoidCrossEntropyLoss from ludwig.models.modules.loss_modules import SequenceLoss from ludwig.models.modules.loss_modules import SoftmaxCrossEntropyLoss from ludwig.utils.tf_utils import sequence_length_2D From eb13293b4fe7058e4b9468bdf2d27940e0e71c88 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 19:28:38 -0700 Subject: [PATCH 31/34] Fix: vector output feature bugs (missed kwargs and missing call to .numpy() on TF tesnor) --- ludwig/features/vector_feature.py | 7 +++++-- ludwig/models/modules/generic_decoders.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ludwig/features/vector_feature.py b/ludwig/features/vector_feature.py index c44058cf2ed..d3b5d30543c 100644 --- a/ludwig/features/vector_feature.py +++ b/ludwig/features/vector_feature.py @@ -209,12 +209,15 @@ def __init__(self, feature): def logits( self, inputs, # hidden + **kwargs ): - return self.decoder_obj(inputs) + hidden = inputs[HIDDEN] + return self.decoder_obj(hidden) def predictions( self, inputs, # logits + **kwargs ): return {PREDICTIONS: inputs[LOGITS], LOGITS: inputs[LOGITS]} @@ -285,7 +288,7 @@ def postprocess_results( name = output_feature['name'] if PREDICTIONS in result and len(result[PREDICTIONS]) > 0: - postprocessed[PREDICTIONS] = result[PREDICTIONS] + postprocessed[PREDICTIONS] = result[PREDICTIONS].numpy() if not skip_save_unprocessed_output: np.save( npy_filename.format(name, PREDICTIONS), diff --git a/ludwig/models/modules/generic_decoders.py b/ludwig/models/modules/generic_decoders.py index 3b56669a497..f68aeac3644 100644 --- a/ludwig/models/modules/generic_decoders.py +++ b/ludwig/models/modules/generic_decoders.py @@ -56,7 +56,7 @@ class Projector(Layer): def __init__( self, - num_classes, + vector_size, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', @@ -69,7 +69,7 @@ def __init__( ): super().__init__() self.dense = Dense( - num_classes, + vector_size, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, From e66e3efdadcdd75ebda1d03586e066ffb6ae6c79 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 19:29:19 -0700 Subject: [PATCH 32/34] Added additional outputs (ctegory, set, vector) to test_model_save_reload_API --- .../test_model_training_options.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index d1ab099a742..fee0117a09f 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -275,11 +275,11 @@ def test_model_save_reload_API(csv_filename, tmp_path): output_features = [ binary_feature(), numerical_feature(), - # category_feature(vocab_size=3), + category_feature(vocab_size=3), # sequence_feature(vocab_size=3), # text_feature(vocab_size=3), - # set_feature(vocab_size=3), - # vector_feature() + set_feature(vocab_size=3), + vector_feature(), ] # Generate test data @@ -327,7 +327,14 @@ def test_model_save_reload_API(csv_filename, tmp_path): preds_2 = ludwig_model2.predict(data_df=validation_set) for key in preds_1: - assert np.allclose(preds_1[key], preds_2[key]) + assert preds_1[key].dtype == preds_2[key].dtype + assert preds_1[key].equals(preds_2[key]) + + # col_dtype = preds_1[key].dtype + # if col_dtype in {'int32', 'int64', 'float32', 'float64'}: + # assert np.allclose(preds_1[key], preds_2[key]) + # else: + # assert preds_1[key].equals(preds_2[key]) for if_name in ludwig_model1.model.ecd.input_features: if1 = ludwig_model1.model.ecd.input_features[if_name] From fbfcb848d7d2bf9867dcc7707670bf4740b67c94 Mon Sep 17 00:00:00 2001 From: w4nderlust Date: Tue, 30 Jun 2020 20:41:30 -0700 Subject: [PATCH 33/34] Added timeseries inputs to test_model_save_reload_API test --- tests/integration_tests/test_model_training_options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index fee0117a09f..c2fcffaeda1 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -14,7 +14,7 @@ from tests.integration_tests.utils import binary_feature, numerical_feature, \ category_feature, sequence_feature, date_feature, h3_feature, \ set_feature, generate_data, text_feature, vector_feature, bag_feature, \ - image_feature, audio_feature + image_feature, audio_feature, timeseries_feature GeneratedData = namedtuple('GeneratedData', 'train_df validation_df test_df') @@ -265,7 +265,7 @@ def test_model_save_reload_API(csv_filename, tmp_path): vector_feature(), image_feature(image_dest_folder), audio_feature(audio_dest_folder), - # timeseries_feature(), + timeseries_feature(), date_feature(), h3_feature(), set_feature(vocab_size=3), From eedc838b2ada3bfc1121fa9cd17de5a877d0c616 Mon Sep 17 00:00:00 2001 From: Jim Thompson Date: Wed, 1 Jul 2020 06:44:49 -0400 Subject: [PATCH 34/34] fix: IndexError exception after model weights restore - work-in-progress --- ludwig/models/modules/sequence_decoders.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ludwig/models/modules/sequence_decoders.py b/ludwig/models/modules/sequence_decoders.py index 615a8f96168..0924798d928 100644 --- a/ludwig/models/modules/sequence_decoders.py +++ b/ludwig/models/modules/sequence_decoders.py @@ -455,9 +455,10 @@ def decoder_greedy( # ================ predictions ================= greedy_sampler = tfa.seq2seq.GreedyEmbeddingSampler() - # decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1) + decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1) start_tokens = tf.fill([batch_size], self.GO_SYMBOL) end_token = self.END_SYMBOL + decoder_inp_emb = self.decoder_embedding(decoder_input) if self.attention_mechanism is not None: self.attention_mechanism.setup_memory(