From 82bbffe2664bbe992103224e71dabb7f10c4a528 Mon Sep 17 00:00:00 2001 From: Arvind Sridhar <130104093+asdataminer@users.noreply.github.com> Date: Tue, 20 Jun 2023 07:25:51 -0700 Subject: [PATCH] Bug fix --- ludwig/data/preprocessing.py | 4 ++-- ludwig/trainers/trainer_rlhf.py | 10 +++------- tests/integration_tests/test_trainer.py | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py index b4b53161831..7bde94e5898 100644 --- a/ludwig/data/preprocessing.py +++ b/ludwig/data/preprocessing.py @@ -1323,7 +1323,7 @@ def build_dataset( proc_cols[proc_column] = backend.df_engine.map_objects(proc_cols[proc_column], lambda x: x.reshape(-1)) # If training a reward model, prepare the processed columns - if mode == "training" and "reward_dataset" in global_preprocessing_parameters: + if mode == "training" and global_preprocessing_parameters["reward_dataset"] is not None: reward_parameter_names = [ "id_column", "outcome_column", @@ -1418,7 +1418,7 @@ def build_dataset( dataset = embed_fixed_features(dataset, feature_configs, metadata, backend) # If training a reward model, perform grouping and joining on dataset - if mode == "training" and "reward_dataset" in global_preprocessing_parameters: + if mode == "training" and global_preprocessing_parameters["reward_dataset"] is not None: def parse_id_rows_group(rows_group): rows_idxs = rows_group.index diff --git a/ludwig/trainers/trainer_rlhf.py b/ludwig/trainers/trainer_rlhf.py index e286cae9e6f..011b89b5f66 100644 --- a/ludwig/trainers/trainer_rlhf.py +++ b/ludwig/trainers/trainer_rlhf.py @@ -71,7 +71,8 @@ def step(self, batch_size: int): input_feature_name: [ input_feature.create_sample_input(batch_size=batch_size).to(trainer.device), input_feature.create_sample_input(batch_size=batch_size).to(trainer.device), - ] for input_feature_name, input_feature in trainer.model.input_features.items() + ] + for input_feature_name, input_feature in trainer.model.input_features.items() } targets = { output_feature_name: output_feature.create_sample_output(batch_size=batch_size).to(trainer.device) @@ -94,12 +95,7 @@ def train_step( Returns: A tuple of the loss tensor and a dictionary of loss for every output feature. """ - if not all( - [ - self.use_amp is False, - self.evaluate_training_set is True, - ] - ): + if self.use_amp is True: raise ValueError("Invalid trainer arguments for RLHF reward model") # Validate inputs and targets diff --git a/tests/integration_tests/test_trainer.py b/tests/integration_tests/test_trainer.py index ef62e804f75..80a3cb8d5e4 100644 --- a/tests/integration_tests/test_trainer.py +++ b/tests/integration_tests/test_trainer.py @@ -228,7 +228,7 @@ def test_rlhf_reward_model_trainer(tmpdir): input_features = [ text_feature( name=transcript_column, - encoder={"type": "auto_transformer", "pretrained_model_name_or_path": "bert-base-uncased"}, + encoder={"type": "auto_transformer", "pretrained_model_name_or_path": "gpt2", "trainable": True}, ) ] output_features = [number_feature(name=id_column)]