From 82bbffe2664bbe992103224e71dabb7f10c4a528 Mon Sep 17 00:00:00 2001
From: Arvind Sridhar <130104093+asdataminer@users.noreply.github.com>
Date: Tue, 20 Jun 2023 07:25:51 -0700
Subject: [PATCH] Bug fix

---
 ludwig/data/preprocessing.py            |  4 ++--
 ludwig/trainers/trainer_rlhf.py         | 10 +++-------
 tests/integration_tests/test_trainer.py |  2 +-
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py
index b4b53161831..7bde94e5898 100644
--- a/ludwig/data/preprocessing.py
+++ b/ludwig/data/preprocessing.py
@@ -1323,7 +1323,7 @@ def build_dataset(
                 proc_cols[proc_column] = backend.df_engine.map_objects(proc_cols[proc_column], lambda x: x.reshape(-1))
 
     # If training a reward model, prepare the processed columns
-    if mode == "training" and "reward_dataset" in global_preprocessing_parameters:
+    if mode == "training" and global_preprocessing_parameters["reward_dataset"] is not None:
         reward_parameter_names = [
             "id_column",
             "outcome_column",
@@ -1418,7 +1418,7 @@ def build_dataset(
     dataset = embed_fixed_features(dataset, feature_configs, metadata, backend)
 
     # If training a reward model, perform grouping and joining on dataset
-    if mode == "training" and "reward_dataset" in global_preprocessing_parameters:
+    if mode == "training" and global_preprocessing_parameters["reward_dataset"] is not None:
 
         def parse_id_rows_group(rows_group):
             rows_idxs = rows_group.index
diff --git a/ludwig/trainers/trainer_rlhf.py b/ludwig/trainers/trainer_rlhf.py
index e286cae9e6f..011b89b5f66 100644
--- a/ludwig/trainers/trainer_rlhf.py
+++ b/ludwig/trainers/trainer_rlhf.py
@@ -71,7 +71,8 @@ def step(self, batch_size: int):
                     input_feature_name: [
                         input_feature.create_sample_input(batch_size=batch_size).to(trainer.device),
                         input_feature.create_sample_input(batch_size=batch_size).to(trainer.device),
-                    ] for input_feature_name, input_feature in trainer.model.input_features.items()
+                    ]
+                    for input_feature_name, input_feature in trainer.model.input_features.items()
                 }
                 targets = {
                     output_feature_name: output_feature.create_sample_output(batch_size=batch_size).to(trainer.device)
@@ -94,12 +95,7 @@ def train_step(
         Returns:
             A tuple of the loss tensor and a dictionary of loss for every output feature.
         """
-        if not all(
-            [
-                self.use_amp is False,
-                self.evaluate_training_set is True,
-            ]
-        ):
+        if self.use_amp is True:
             raise ValueError("Invalid trainer arguments for RLHF reward model")
 
         # Validate inputs and targets
diff --git a/tests/integration_tests/test_trainer.py b/tests/integration_tests/test_trainer.py
index ef62e804f75..80a3cb8d5e4 100644
--- a/tests/integration_tests/test_trainer.py
+++ b/tests/integration_tests/test_trainer.py
@@ -228,7 +228,7 @@ def test_rlhf_reward_model_trainer(tmpdir):
     input_features = [
         text_feature(
             name=transcript_column,
-            encoder={"type": "auto_transformer", "pretrained_model_name_or_path": "bert-base-uncased"},
+            encoder={"type": "auto_transformer", "pretrained_model_name_or_path": "gpt2", "trainable": True},
         )
     ]
     output_features = [number_feature(name=id_column)]