Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 11 additions & 15 deletions prompting/tasks/MSRv2_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,20 @@ def make_query(self, dataset_entry: DDGDatasetEntry):

async def make_reference(self, dataset_entry: Context, model_manager: ModelManager):
if self.stage == "generative":
# Always generate and store the validator's version of the reference.
validator_ref_attempt = await super().make_reference(dataset_entry, model_manager=model_manager)
self.validator_generated_reference = (
validator_ref_attempt if isinstance(validator_ref_attempt, str) else None
)

# Decide if the validator's generated reference will be the "official" reference for discrimination.
if random.random() < self.REAL_REFERENCE_PROBABILITY:
self.reference = self.validator_generated_reference # Validator's answer is CHOSEN
# self.generative_miner_answer remains None in this case, it's not the active reference from task's perspective yet
# Validator's turn to generate the reference
reference_attempt = await super().make_reference(dataset_entry, model_manager=model_manager)
self.reference = reference_attempt if isinstance(reference_attempt, str) else None
self.validator_generated_reference = self.reference # Store the validator's generated reference
return self.reference
else:
# Validator's answer is NOT chosen. self.reference remains None.
# We will use the miner's answer (once received) as the reference.
# This will be populated into self.generative_miner_answer in the reward model.
return None # Indicates we are waiting for the miner's answer to be the reference
else:
# return 1 if validator's reference was chosen, 0 if miner's reference was chosen
# Miner's turn to generate the reference; validator does not generate.
self.reference = None # Ensure self.reference is None
self.validator_generated_reference = None # Ensure this is None
# Return None to indicate that the reference is expected from the miner
return None
else: # stage == "discriminative"
# This part indicates if the validator's reference was chosen (1) or if the miner's is expected (0).
return 1 if self.reference else 0

@property
Expand Down