Skip to content
25 changes: 10 additions & 15 deletions prompting/rewards/ordinal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,16 @@
class OrdinalRewardModel(BaseRewardModel):
@property
def name(self) -> str:
return "category_distance"
return "ordinal"

def __init__(self, **kwargs):
super().__init__()
#TODO: Expand to allow for more than 3 classes (Must also adjust dataset/review.py)
self.sentiments = [
"casual",
"basic",
"silly",
"random",
"thoughtful",
"serious",
"rushed",
self.sentiments = [
"positive",
"neutral",
"negative",
]
#NOTE: These sentimens are not the same as the sentiments defined in the dataset/review.py file. These are the subtopic


def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
"""Compute difference scores given a completion and reference pair."""
Expand All @@ -31,10 +25,11 @@ def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
classes = self.sentiments
for completion in completions:
t0 = time.time()

completion = completion.lower()
# Check if exactly one answer can be found in the completion
if sum(option in completion for option in classes) == 1:
reward = abs(classes.index(reference) - classes.index(completion))
answer = [option for option in classes if option in completion][0]
reward = 1-abs(classes.index(reference) - classes.index(answer))/(len(classes)-1)
else:
reward = 0
timings.append(time.time() - t0)
Expand All @@ -44,7 +39,7 @@ def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
rewards=torch.FloatTensor(rewards),
timings=torch.FloatTensor(timings),
extra_info={
"type": "math",
"type": "ordinal",
},
)
return output
return output
5 changes: 1 addition & 4 deletions prompting/rewards/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,7 @@ def total_reward(self) -> torch.FloatTensor:
"""Combines the rewards from all the reward models into a single reward tensor"""

# TODO: How would using the Agent as a reward model fit into this flow?
# Compute the rewards for the responses given the prompt
rewards = torch.zeros_like(
self.response_event.uids, dtype=torch.float32, device=self.device
)
# Compute the rewards for the responses given the prompt
rewards = torch.zeros_like(
self.response_event.uids, dtype=torch.float32, device=self.device
)
Expand Down
2 changes: 1 addition & 1 deletion prompting/tasks/challenge_templates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def next(self, query: str):
self.fields["query"] = [query]
return self.get_template().format(
**{field: random.choice(entries) for field, entries in self.fields.items()}
)
).split("<end>")[0]

def get_template(self):
return random.choice(self.templates)
15 changes: 11 additions & 4 deletions prompting/tasks/challenge_templates/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ class MathChallengeTemplate(ChallengeTemplate):
def __init__(self):
super().__init__()
self.templates = [
"{greeting}{greeting_punctuation}{query}{request}<end>",
"{query}<end>{greeting}{greeting_punctuation}{request}",
"{greeting}{greeting_punctuation}{query}<end>{request}",
"{query}{request}<end>{greeting}{greeting_punctuation}",
"{greeting}{greeting_punctuation}{query}{whitespace}{request}<end>",
"{query}<end>{greeting}{greeting_punctuation}{request}{whitespace}",
"{greeting}{greeting_punctuation}{query}<end>{request}{whitespace}",
"{query}{whitespace}{request}<end>{greeting}{greeting_punctuation}",
]
self.fields = {
"greeting": [
Expand Down Expand Up @@ -181,4 +181,11 @@ def __init__(self):
"Can you help me understand this, please?",
"I could use your help figuring this out.",
],
"whitespace": [
"",
" ",
" ",
"\n",
"\n\n",
]
}
11 changes: 9 additions & 2 deletions prompting/tasks/challenge_templates/sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ class SentimentChallengeTemplate(ChallengeTemplate):
def __init__(self):
super().__init__()
self.templates = [
"{greeting}{greeting_punctuation}{request}{query}<end>",
"{request}{query}<end>{greeting}{greeting_punctuation}",
"{greeting}{greeting_punctuation}{request}{whitespace}{query}<end>",
"{request}{whitespace}{query}<end>{greeting}{greeting_punctuation}",
]
self.fields = {
"greeting": [
Expand Down Expand Up @@ -176,4 +176,11 @@ def __init__(self):
"Can you evaluate the sentiment of this text?",
"I'm interested in the sentiment analysis of this text.",
],
"whitespace": [
"",
" ",
" ",
"\n",
"\n\n",
]
}
1 change: 0 additions & 1 deletion prompting/tasks/sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ class SentimentAnalysisTask(Task):
desc = "get help analyzing the sentiment of a review"
goal = "to get the sentiment to the following review"
challenge_type = 'paraphrase'
challenge_type = 'paraphrase'
challenge_template = SentimentChallengeTemplate()

reward_definition = [
Expand Down
3 changes: 3 additions & 0 deletions prompting/tasks/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ def translate_to_random_language(self, content: str, from_code:str='en') -> Tupl

@dataclass
class TranslationTask(Task):
challenge_type = 'query'
static_reference = True
static_query = True
name = "translation"
desc = "get translation help"
goal = "to get the translation for the given piece of text"
Expand Down