diff --git a/prompting/rewards/ordinal.py b/prompting/rewards/ordinal.py index d8cfd433b..34f749621 100644 --- a/prompting/rewards/ordinal.py +++ b/prompting/rewards/ordinal.py @@ -7,22 +7,16 @@ class OrdinalRewardModel(BaseRewardModel): @property def name(self) -> str: - return "category_distance" + return "ordinal" def __init__(self, **kwargs): super().__init__() #TODO: Expand to allow for more than 3 classes (Must also adjust dataset/review.py) - self.sentiments = [ - "casual", - "basic", - "silly", - "random", - "thoughtful", - "serious", - "rushed", + self.sentiments = [ + "positive", + "neutral", + "negative", ] - #NOTE: These sentimens are not the same as the sentiments defined in the dataset/review.py file. These are the subtopic - def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput: """Compute difference scores given a completion and reference pair.""" @@ -31,10 +25,11 @@ def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput: classes = self.sentiments for completion in completions: t0 = time.time() - + completion = completion.lower() # Check if exactly one answer can be found in the completion if sum(option in completion for option in classes) == 1: - reward = abs(classes.index(reference) - classes.index(completion)) + answer = [option for option in classes if option in completion][0] + reward = 1-abs(classes.index(reference) - classes.index(answer))/(len(classes)-1) else: reward = 0 timings.append(time.time() - t0) @@ -44,7 +39,7 @@ def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput: rewards=torch.FloatTensor(rewards), timings=torch.FloatTensor(timings), extra_info={ - "type": "math", + "type": "ordinal", }, ) - return output \ No newline at end of file + return output diff --git a/prompting/rewards/reward.py b/prompting/rewards/reward.py index 1adf35a73..bf5d2bc0b 100644 --- a/prompting/rewards/reward.py +++ b/prompting/rewards/reward.py @@ -100,10 +100,7 @@ def total_reward(self) -> torch.FloatTensor: """Combines the rewards from all the reward models into a single reward tensor""" # TODO: How would using the Agent as a reward model fit into this flow? - # Compute the rewards for the responses given the prompt - rewards = torch.zeros_like( - self.response_event.uids, dtype=torch.float32, device=self.device - ) + # Compute the rewards for the responses given the prompt rewards = torch.zeros_like( self.response_event.uids, dtype=torch.float32, device=self.device ) diff --git a/prompting/tasks/challenge_templates/base.py b/prompting/tasks/challenge_templates/base.py index 6c8fc7ef1..dff133a1c 100644 --- a/prompting/tasks/challenge_templates/base.py +++ b/prompting/tasks/challenge_templates/base.py @@ -11,7 +11,7 @@ def next(self, query: str): self.fields["query"] = [query] return self.get_template().format( **{field: random.choice(entries) for field, entries in self.fields.items()} - ) + ).split("")[0] def get_template(self): return random.choice(self.templates) diff --git a/prompting/tasks/challenge_templates/math.py b/prompting/tasks/challenge_templates/math.py index ae4769be8..8902568f1 100644 --- a/prompting/tasks/challenge_templates/math.py +++ b/prompting/tasks/challenge_templates/math.py @@ -6,10 +6,10 @@ class MathChallengeTemplate(ChallengeTemplate): def __init__(self): super().__init__() self.templates = [ - "{greeting}{greeting_punctuation}{query}{request}", - "{query}{greeting}{greeting_punctuation}{request}", - "{greeting}{greeting_punctuation}{query}{request}", - "{query}{request}{greeting}{greeting_punctuation}", + "{greeting}{greeting_punctuation}{query}{whitespace}{request}", + "{query}{greeting}{greeting_punctuation}{request}{whitespace}", + "{greeting}{greeting_punctuation}{query}{request}{whitespace}", + "{query}{whitespace}{request}{greeting}{greeting_punctuation}", ] self.fields = { "greeting": [ @@ -181,4 +181,11 @@ def __init__(self): "Can you help me understand this, please?", "I could use your help figuring this out.", ], + "whitespace": [ + "", + " ", + " ", + "\n", + "\n\n", + ] } diff --git a/prompting/tasks/challenge_templates/sentiment.py b/prompting/tasks/challenge_templates/sentiment.py index 47da111a7..e715a8d33 100644 --- a/prompting/tasks/challenge_templates/sentiment.py +++ b/prompting/tasks/challenge_templates/sentiment.py @@ -6,8 +6,8 @@ class SentimentChallengeTemplate(ChallengeTemplate): def __init__(self): super().__init__() self.templates = [ - "{greeting}{greeting_punctuation}{request}{query}", - "{request}{query}{greeting}{greeting_punctuation}", + "{greeting}{greeting_punctuation}{request}{whitespace}{query}", + "{request}{whitespace}{query}{greeting}{greeting_punctuation}", ] self.fields = { "greeting": [ @@ -176,4 +176,11 @@ def __init__(self): "Can you evaluate the sentiment of this text?", "I'm interested in the sentiment analysis of this text.", ], + "whitespace": [ + "", + " ", + " ", + "\n", + "\n\n", + ] } diff --git a/prompting/tasks/sentiment.py b/prompting/tasks/sentiment.py index 60635d59a..ef11df4ae 100644 --- a/prompting/tasks/sentiment.py +++ b/prompting/tasks/sentiment.py @@ -12,7 +12,6 @@ class SentimentAnalysisTask(Task): desc = "get help analyzing the sentiment of a review" goal = "to get the sentiment to the following review" challenge_type = 'paraphrase' - challenge_type = 'paraphrase' challenge_template = SentimentChallengeTemplate() reward_definition = [ diff --git a/prompting/tasks/translate.py b/prompting/tasks/translate.py index 1913db6ed..17c79c4f5 100644 --- a/prompting/tasks/translate.py +++ b/prompting/tasks/translate.py @@ -71,6 +71,9 @@ def translate_to_random_language(self, content: str, from_code:str='en') -> Tupl @dataclass class TranslationTask(Task): + challenge_type = 'query' + static_reference = True + static_query = True name = "translation" desc = "get translation help" goal = "to get the translation for the given piece of text"