diff --git a/prompting/__init__.py b/prompting/__init__.py index 4990f6d06..994db302a 100644 --- a/prompting/__init__.py +++ b/prompting/__init__.py @@ -16,7 +16,7 @@ # DEALINGS IN THE SOFTWARE. # Define the version of the template module. -__version__ = "2.1.1" +__version__ = "2.2.0" version_split = __version__.split(".") __spec_version__ = ( (10000 * int(version_split[0])) @@ -36,6 +36,7 @@ from . import agent from . import conversation from . import dendrite +from . import shared from . import validator from .llms import hf diff --git a/prompting/agent.py b/prompting/agent.py index 34ed4d092..b4b67255b 100644 --- a/prompting/agent.py +++ b/prompting/agent.py @@ -87,10 +87,16 @@ def create_challenge(self) -> str: cleaner = None if hasattr(self.task, "cleaning_pipeline"): cleaner = CleanerPipeline(cleaning_pipeline=self.task.cleaning_pipeline) - - self.challenge = super().query( - message="Ask a question related to your goal", cleaner=cleaner - ) + if self.task.challenge_type == "inference": + self.challenge = super().query( + message="Ask a question related to your goal", cleaner=cleaner + ) + elif self.task.challenge_type == 'paraphrase': + self.challenge = self.task.challenge_template.next(self.task.query) + elif self.task.challenge_type == 'query': + self.challenge = self.task.query + else: + bt.logging.error(f"Task {self.task.name} has challenge type of: {self.task.challenge_type} which is not supported.") self.challenge = self.task.format_challenge(self.challenge) self.challenge_time = time.time() - t0 diff --git a/prompting/cleaners/all_cleaners.py b/prompting/cleaners/all_cleaners.py index ff0869588..4a7c46dbd 100644 --- a/prompting/cleaners/all_cleaners.py +++ b/prompting/cleaners/all_cleaners.py @@ -1,6 +1,8 @@ from abc import ABC, abstractmethod +from typing import Union import bittensor as bt import re +from typing import Union class BaseCleaner(ABC): @@ -57,6 +59,7 @@ def capitalize_sentences(self, input_string): return result_string def apply(self, generation: str) -> str: + generation = re.sub(r'\n*\w+\s*:','',generation) roles = [ "User: ", "System: ", @@ -77,3 +80,25 @@ def apply(self, generation: str) -> str: return self.capitalize_sentences( input_string=generation ) # LLMs are good at being formal. Do the same if we remove a prefix. + +class PrunePostQuestionText(BaseCleaner): + def __init__(self, **kwargs): + pass + + def apply(self, generation: str, min_pos: Union[int,float] = 5, max_pos: Union[int,float]= 0.5, max_questions: int = None) -> str: + + if min_pos < 1: + min_pos = int(min_pos * len(generation)) + if max_pos < 1: + max_pos = int(max_pos * len(generation)) + + # question mark occurs in first half of the query + if not min_pos <= generation.rfind("?") <= max_pos: + return generation + elif max_questions is not None: + generation = '?'.join(generation.split("?",max_questions)[:-1]) + '?' + else: + # drop everything after the last question mark. Alternatively, we can just extract the first question. + generation = generation.rsplit("?",1) + '?' + + return generation \ No newline at end of file diff --git a/prompting/cleaners/cleaner.py b/prompting/cleaners/cleaner.py index 5d7e4b3a3..931d9de3d 100644 --- a/prompting/cleaners/cleaner.py +++ b/prompting/cleaners/cleaner.py @@ -2,12 +2,13 @@ import bittensor as bt -from prompting.cleaners.all_cleaners import RemoveQuotes, RemoveRoles, PruneEnding +from prompting.cleaners.all_cleaners import RemoveQuotes, RemoveRoles, PruneEnding, PrunePostQuestionText SUPPORTED_CLEANERS = { "remove_quotes": RemoveQuotes, "remove_roles": RemoveRoles, "prune_ending": PruneEnding, + "remove_post_question_text": PrunePostQuestionText, } diff --git a/prompting/conversation.py b/prompting/conversation.py index e17f5cd3d..3e13a9a4a 100644 --- a/prompting/conversation.py +++ b/prompting/conversation.py @@ -1,12 +1,13 @@ import random from transformers import Pipeline -from prompting.tasks import Task, TASKS +from prompting.tasks import Task, TASKS, TranslationPipeline, TranslationTask from prompting.tools import Selector, DATASETS from prompting.task_registry import TASK_REGISTRY def create_task( llm_pipeline: Pipeline, + translation_pipeline: TranslationPipeline, task_name: str, create_reference: bool = True, selector: Selector = random.choice, @@ -42,6 +43,12 @@ def create_task( raise ValueError(f"Dataset {dataset_name} not found") else: dataset = dataset() + + if task_name == TranslationTask.name: + return task( + translation_pipeline=translation_pipeline, + context=dataset.next() + ) return task( llm_pipeline=llm_pipeline, diff --git a/prompting/forward.py b/prompting/forward.py index 6a9b6ed34..170f85254 100644 --- a/prompting/forward.py +++ b/prompting/forward.py @@ -16,28 +16,30 @@ # DEALINGS IN # THE SOFTWARE. -import time import sys +import time +import random import asyncio +import traceback import numpy as np import bittensor as bt -import traceback from typing import List, Dict, Awaitable from prompting.agent import HumanAgent from prompting.dendrite import DendriteResponseEvent from prompting.conversation import create_task from prompting.protocol import StreamPromptingSynapse from prompting.rewards import RewardResult +from prompting.tasks import QuestionAnsweringTask from prompting.utils.uids import get_random_uids from prompting.utils.logging import log_event from prompting.utils.misc import async_log, serialize_exception_to_string from dataclasses import dataclass @async_log -async def generate_reference(agent): +async def generate_reference(agent): loop = asyncio.get_running_loop() result = await loop.run_in_executor(None, agent.task.generate_reference, agent.llm_pipeline) - return result + return result @async_log async def execute_dendrite_call(dendrite_call): @@ -167,7 +169,7 @@ def log_stream_results(stream_results: List[StreamResult]): async def run_step( - self, agent: HumanAgent, k: int, timeout: float, exclude: list = None + self, agent: HumanAgent, roles: List[str], messages: List[str], k: int, timeout: float, exclude: list = None ): """Executes a single step of the agent, which consists of: - Getting a list of uids to query @@ -178,6 +180,8 @@ async def run_step( Args: agent (HumanAgent): The agent to run the step for. + roles (List[str]): The roles for the synapse. + messages (List[str]): The messages for the synapse. k (int): The number of uids to query. timeout (float): The timeout for the queries. exclude (list, optional): The list of uids to exclude from the query. Defaults to []. @@ -196,7 +200,7 @@ async def run_step( # Directly call dendrite and process responses in parallel streams_responses = await self.dendrite( axons=axons, - synapse=StreamPromptingSynapse(roles=["user"], messages=[agent.challenge]), + synapse=StreamPromptingSynapse(roles=roles, messages=messages), timeout=timeout, deserialize=False, streaming=True, @@ -217,8 +221,8 @@ async def run_step( log_stream_results(stream_results) - all_synapses_results = [stream_result.synapse for stream_result in stream_results] - + all_synapses_results = [stream_result.synapse for stream_result in stream_results] + # Encapsulate the responses in a response event (dataclass) response_event = DendriteResponseEvent( responses=all_synapses_results, uids=uids, timeout=timeout @@ -235,10 +239,12 @@ async def run_step( ) bt.logging.info(f"Created RewardResult:\n {reward_result}") + best_response = response_event.completions[reward_result.rewards.argmax()] + # The original idea was that the agent is 'satisfied' when it gets a good enough response (e.g. reward critera is met, such as ROUGE>threshold) agent.update_progress( top_reward=reward_result.rewards.max(), - top_response=response_event.completions[reward_result.rewards.argmax()], + top_response=best_response, ) self.update_scores(reward_result.rewards, uids) @@ -250,7 +256,9 @@ async def run_step( ] # Log the step event. event = { + "best": best_response, "block": self.block, + "step": self.step, "step_time": time.time() - start_time, "stream_results_uids": stream_results_uids, "stream_results_exceptions": stream_results_exceptions, @@ -263,6 +271,10 @@ async def run_step( async def forward(self): + """ + Encapsulates a full conversation between the validator and miners. Contains one or more rounds of request-response. + + """ bt.logging.info("🚀 Starting forward loop...") forward_start_time = time.time() @@ -278,6 +290,7 @@ async def forward(self): try: task = create_task( llm_pipeline=self.llm_pipeline, + translation_pipeline=self.translation_pipeline, task_name=task_name, create_reference=False, ) @@ -294,9 +307,11 @@ async def forward(self): task=task, llm_pipeline=self.llm_pipeline, begin_conversation=True ) - rounds = 0 + turn = 0 exclude_uids = [] - while not agent.finished: + roles = ['user'] + messages = [agent.challenge] + while True: # Note: The try catch is a safe clause to ensure that the forward loop continues even if an error occurs in run_step. # To be reconsidered in the next version. try: @@ -304,6 +319,8 @@ async def forward(self): event = await run_step( self, agent, + roles=roles, + messages=messages, k=self.config.neuron.sample_size, timeout=self.config.neuron.timeout, exclude=exclude_uids, @@ -311,12 +328,32 @@ async def forward(self): # Adds forward time to event and logs it to wandb event["forward_time"] = time.time() - forward_start_time + event["turn"] = turn log_event(self, event) exclude_uids += event["uids"] task.complete = True + + accepted_answer = event["best"] if random.random() < 0.5 else agent.task.reference + roles.append("assistant") + messages.append(accepted_answer) + + # 50% chance of single turn conversation, 25% of two turns, 12.5% chance of 3 turns, 6.25% chance of 4 turns, 3.63% chance of 5... + if random.random()<0.5 or turn>=2: + break + + history = '\n'.join([f"{role}: {message}" for role, message in zip(roles, messages)]) + + # Use PREVIOUS task context + agent.task = QuestionAnsweringTask(self.llm_pipeline, context=task.context, create_reference=False, history=history) + + # overwrite the challenge with the followup query, which *should* continue the persona + agent.challenge = agent.task.query + + roles.append("user") + messages.append(agent.challenge) + turn += 1 - rounds += 1 except BaseException as e: unexpected_errors = serialize_exception_to_string(e) bt.logging.error( diff --git a/prompting/rewards/__init__.py b/prompting/rewards/__init__.py index 4df8159d9..51cab779a 100644 --- a/prompting/rewards/__init__.py +++ b/prompting/rewards/__init__.py @@ -10,4 +10,5 @@ from .rouge import RougeRewardModel from .float_diff import FloatDiffModel from .date import DateRewardModel +from .ordinal import OrdinalRewardModel from .pipeline import RewardPipeline, REWARD_MODELS diff --git a/prompting/rewards/ordinal.py b/prompting/rewards/ordinal.py new file mode 100644 index 000000000..d8cfd433b --- /dev/null +++ b/prompting/rewards/ordinal.py @@ -0,0 +1,50 @@ +import time +import torch +from typing import List +from prompting.rewards import BaseRewardModel, BatchRewardOutput + + +class OrdinalRewardModel(BaseRewardModel): + @property + def name(self) -> str: + return "category_distance" + + def __init__(self, **kwargs): + super().__init__() + #TODO: Expand to allow for more than 3 classes (Must also adjust dataset/review.py) + self.sentiments = [ + "casual", + "basic", + "silly", + "random", + "thoughtful", + "serious", + "rushed", + ] + #NOTE: These sentimens are not the same as the sentiments defined in the dataset/review.py file. These are the subtopic + + + def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput: + """Compute difference scores given a completion and reference pair.""" + rewards = [] + timings = [] + classes = self.sentiments + for completion in completions: + t0 = time.time() + + # Check if exactly one answer can be found in the completion + if sum(option in completion for option in classes) == 1: + reward = abs(classes.index(reference) - classes.index(completion)) + else: + reward = 0 + timings.append(time.time() - t0) + rewards.append(reward) + + output = BatchRewardOutput( + rewards=torch.FloatTensor(rewards), + timings=torch.FloatTensor(timings), + extra_info={ + "type": "math", + }, + ) + return output \ No newline at end of file diff --git a/prompting/rewards/pipeline.py b/prompting/rewards/pipeline.py index 1623e3e39..244ec0b3e 100644 --- a/prompting/rewards/pipeline.py +++ b/prompting/rewards/pipeline.py @@ -8,6 +8,7 @@ RelevanceRewardModel, FloatDiffModel, DateRewardModel, + OrdinalRewardModel, ) REWARD_MODELS = { @@ -16,6 +17,7 @@ "diff": DiffRewardModel, "float_diff": FloatDiffModel, "date": DateRewardModel, + "ordinal": OrdinalRewardModel, } diff --git a/prompting/shared/__init__.py b/prompting/shared/__init__.py new file mode 100644 index 000000000..79503d91a --- /dev/null +++ b/prompting/shared/__init__.py @@ -0,0 +1 @@ +from .context import Context \ No newline at end of file diff --git a/prompting/tools/datasets/context.py b/prompting/shared/context.py similarity index 100% rename from prompting/tools/datasets/context.py rename to prompting/shared/context.py diff --git a/prompting/task_registry.py b/prompting/task_registry.py index 2e006694f..f111a29d9 100644 --- a/prompting/task_registry.py +++ b/prompting/task_registry.py @@ -1,5 +1,5 @@ -from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask -from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset, GenericInstructionDataset +from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask, SentimentAnalysisTask, TranslationTask +from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset, GenericInstructionDataset, ReviewDataset # TODO: Expand this to include extra information beyond just the task and dataset names summarization_task, summarization_dataset = SummarizationTask.name, [WikiDataset.name] @@ -8,6 +8,8 @@ math_task, math_dataset = MathTask.name, [MathDataset.name] date_qa_task, date_qa_dataset = DateQuestionAnsweringTask.name, [WikiDateDataset.name] generic_instruction_task, generic_instruction_dataset = GenericInstructionTask.name, [GenericInstructionDataset.name] +translation_task, translation_dataset = TranslationTask.name, [WikiDataset.name] +sentiment_analysis_task, sentiment_analysis_dataset = SentimentAnalysisTask.name, [ReviewDataset.name] TASK_REGISTRY = { summarization_task: summarization_dataset, @@ -15,5 +17,7 @@ #debugging_task: debugging_dataset, math_task: math_dataset, date_qa_task: date_qa_dataset, - generic_instruction_task: generic_instruction_dataset + generic_instruction_task: generic_instruction_dataset, + translation_task: translation_dataset, + sentiment_analysis_task: sentiment_analysis_dataset, } \ No newline at end of file diff --git a/prompting/tasks/__init__.py b/prompting/tasks/__init__.py index 01292ded9..6794c97ca 100644 --- a/prompting/tasks/__init__.py +++ b/prompting/tasks/__init__.py @@ -5,8 +5,9 @@ from .date_qa import DateQuestionAnsweringTask from .generic_instruction import GenericInstructionTask from .math import MathTask +from .translate import TranslationTask, TranslationPipeline from .mock import MockTask - +from .sentiment import SentimentAnalysisTask TASKS = { QuestionAnsweringTask.name: QuestionAnsweringTask, @@ -15,4 +16,6 @@ #DebuggingTask.name: DebuggingTask, GenericInstructionTask.name: GenericInstructionTask, MathTask.name: MathTask, + TranslationTask.name: TranslationTask, + SentimentAnalysisTask.name: SentimentAnalysisTask, } diff --git a/prompting/tasks/challenge_templates/__init__.py b/prompting/tasks/challenge_templates/__init__.py new file mode 100644 index 000000000..d92a27498 --- /dev/null +++ b/prompting/tasks/challenge_templates/__init__.py @@ -0,0 +1,3 @@ +from .base import ChallengeTemplate +from .math import MathChallengeTemplate +from .sentiment import SentimentChallengeTemplate diff --git a/prompting/tasks/challenge_templates/base.py b/prompting/tasks/challenge_templates/base.py new file mode 100644 index 000000000..6c8fc7ef1 --- /dev/null +++ b/prompting/tasks/challenge_templates/base.py @@ -0,0 +1,17 @@ +import random +from abc import ABC +from typing import List + + +class ChallengeTemplate(ABC): + templates: List[str] = ["This is a template with {query}! "] + fields: dict = {"query": ["This is a placeholder for the query"]} + + def next(self, query: str): + self.fields["query"] = [query] + return self.get_template().format( + **{field: random.choice(entries) for field, entries in self.fields.items()} + ) + + def get_template(self): + return random.choice(self.templates) diff --git a/prompting/tasks/challenge_templates/math.py b/prompting/tasks/challenge_templates/math.py new file mode 100644 index 000000000..ae4769be8 --- /dev/null +++ b/prompting/tasks/challenge_templates/math.py @@ -0,0 +1,184 @@ +import random +from .base import ChallengeTemplate + + +class MathChallengeTemplate(ChallengeTemplate): + def __init__(self): + super().__init__() + self.templates = [ + "{greeting}{greeting_punctuation}{query}{request}", + "{query}{greeting}{greeting_punctuation}{request}", + "{greeting}{greeting_punctuation}{query}{request}", + "{query}{request}{greeting}{greeting_punctuation}", + ] + self.fields = { + "greeting": [ + "Hello", + "Hi", + "Hey", + "Yo", + "What's up", + "Howdy", + "Hola", + "Bonjour", + "G'day", + "Good morning", + "Good afternoon", + "Good evening", + "Greetings", + "Sup", + "Hi there", + "Hey there", + "Morning", + "Afternoon", + "Evening", + "Salutations", + "Hey, what's going on", + "Howdy", + "Sup", + "Sup yo", + "Yo", + "Yo yo", + "Greetings", + "Greetings to you", + "Hello", + "Hello friend", + "Hello to you", + "Hey", + "Hey there", + "", + ], + "greeting_punctuation": [ + "!", + "! ", + "! ", + "!\n", + ",", + ", ", + ", ", + ",\n", + ".", + ". ", + ". ", + ".\n", + "", + " ", + " ", + "\n", + "...", + "... ", + "... ", + "...\n", + "", + ], + "request": [ + "Can you assist me, please?", + "Could you lend me a hand?", + "Would you mind helping me out?", + "I could use some assistance.", + "Do you have a moment to help me?", + "I'm in need of some help.", + "Could you give me a hand with this?", + "Would you be willing to help me?", + "Can you offer me some guidance?", + "I'm struggling a bit, could you help?", + "I could really use your expertise.", + "Would you mind showing me how to do this?", + "Can you lend me your expertise for a moment?", + "I'm having trouble, could you assist?", + "Would you be able to lend me a hand?", + "Can you offer me some assistance?", + "I'm stuck, could you help me out?", + "Could you assist me with this problem?", + "Would you be so kind as to help me?", + "Can you offer me some help, please?", + "Solve", + "Could you spare a moment to help me?", + "Would you mind giving me some assistance?", + "Can you help me understand this better?", + "I need your help with something.", + "Could you offer me some support, please?", + "Would you be willing to give me a hand?", + "Can you show me how to do this?", + "I'm having difficulty, could you help me?", + "Could you assist me with this issue?", + "Would you mind helping me with this task?", + "Can you provide some help, please?", + "I'm in a bit of a bind, could you help?", + "Could you lend me a hand with this problem?", + "Would you be able to offer me some guidance?", + "Can you help me out with this, please?", + "I'm having trouble understanding, could you help?", + "Could you offer me some assistance, please?", + "Would you mind assisting me with this?", + "Can you give me some advice?", + "I could use your help with this.", + "Could you spare some time to help me?", + "Would you be willing to lend me a hand?", + "Can you help me solve this problem?", + "I'm struggling to figure this out, could you help?", + "Could you provide me with some assistance?", + "Would you mind showing me what to do?", + "Can you assist me in resolving this issue?", + "I could really use your help.", + "Could you help me out with this task?", + "Would you be so kind as to give me a hand?", + "Can you help me with this problem, please?", + "I'm stuck on this, could you assist?", + "Could you lend me a hand with this, please?", + "Would you be able to provide me with some guidance?", + "Can you offer me some assistance with this?", + "I'm having difficulty understanding, could you help me?", + "Could you assist me with this problem, please?", + "Would you mind giving me a hand with this?", + "Can you show me how to do this, please?", + "I'm struggling with this, could you help me out?", + "Could you offer me some help with this?", + "Would you be willing to help me with this, please?", + "Can you provide me with some support, please?", + "I'm in a bit of a bind, could you assist me?", + "Could you lend me your expertise?", + "Would you be able to spare a moment to help me?", + "Can you help me out with this problem, please?", + "I'm having trouble with this, could you help me out?", + "Could you assist me with this task, please?", + "Would you mind offering me some assistance?", + "Can you assist me with this issue, please?", + "I could use some assistance with this, could you help?", + "Could you give me a hand with this issue, please?", + "Would you be so kind as to lend me a hand?", + "Can you provide me with some assistance on this?", + "I'm having difficulty with this task, could you help?", + "Could you offer me some help on this, please?", + "Would you mind helping me with this problem?", + "Can you lend me a hand with this, please?", + "I'm stuck on this problem, could you help?", + "Could you show me how to do this, please?", + "Would you be willing to assist me with this?", + "Can you help me with this task, please?", + "I'm struggling with this problem, could you assist?", + "Could you give me some guidance on this, please?", + "Would you mind giving me some help with this?", + "Can you help me with this issue, please?", + "I could use your help with this problem.", + "Could you spare some time to help me out?", + "Would you be able to lend me your expertise?", + "Can you offer me some assistance with this problem?", + "I'm in need of some help with this, could you assist?", + "Could you assist me with this problem, please?", + "Can you help me out with this issue, please?", + "I'm having trouble with this task, could you help?", + "Could you lend me a hand with this problem, please?", + "Would you be willing to give me some assistance?", + "Can you provide me with some help, please?", + "I'm stuck on this issue, could you help me?", + "Could you show me what to do, please?", + "Would you mind helping me with this task, please?", + "Can you lend me your expertise for a moment, please?", + "I'm struggling with this issue, could you assist me?", + "Could you give me a hand with this problem, please?", + "Would you be so kind as to offer me some assistance?", + "Can you help me understand this, please?", + "I could use your help figuring this out.", + ], + } diff --git a/prompting/tasks/challenge_templates/sentiment.py b/prompting/tasks/challenge_templates/sentiment.py new file mode 100644 index 000000000..47da111a7 --- /dev/null +++ b/prompting/tasks/challenge_templates/sentiment.py @@ -0,0 +1,179 @@ +import random +from .base import ChallengeTemplate + + +class SentimentChallengeTemplate(ChallengeTemplate): + def __init__(self): + super().__init__() + self.templates = [ + "{greeting}{greeting_punctuation}{request}{query}", + "{request}{query}{greeting}{greeting_punctuation}", + ] + self.fields = { + "greeting": [ + "Hello", + "Hi", + "Hey", + "Yo", + "What's up", + "Howdy", + "Hola", + "Bonjour", + "G'day", + "Good morning", + "Good afternoon", + "Good evening", + "Greetings", + "Sup", + "Hi there", + "Hey there", + "Morning", + "Afternoon", + "Evening", + "Salutations", + "Hey, what's going on", + "Howdy", + "Sup", + "Sup yo", + "Yo", + "Yo yo", + "Greetings", + "Greetings to you", + "Hello", + "Hello friend", + "Hello to you", + "Hey", + "Hey there", + "", + ], + "greeting_punctuation": [ + "!", + "! ", + "! ", + "!\n", + ",", + ", ", + ", ", + ",\n", + ".", + ". ", + ". ", + ".\n", + "", + " ", + " ", + "\n", + "...", + "... ", + "... ", + "...\n", + "", + ], + "request": [ + "Please analyze the sentiment of this review.", + "I'd like to know the sentiment of this text.", + "Can you determine the sentiment of this review?", + "Analyze the sentiment of this review, please.", + "Evaluate the sentiment of this text.", + "What's the sentiment of this review?", + "I'm curious about the sentiment of this text.", + "Please provide the sentiment analysis for this review.", + "Could you analyze the sentiment of this text?", + "Assess the sentiment of this review.", + "I'd like the sentiment of this text analyzed.", + "Can you determine the sentiment of this text?", + "Analyze this review's sentiment, please.", + "What's the sentiment of this text?", + "I'm interested in the sentiment analysis of this review.", + "Please evaluate the sentiment of this text.", + "Could you provide the sentiment analysis for this review?", + "Determine the sentiment of this text.", + "I'd appreciate it if you could analyze the sentiment of this review.", + "Can you analyze the sentiment of this text?", + "Please assess the sentiment of this review.", + "What's the sentiment of this review, please?", + "I'm curious about the sentiment of this review.", + "Could you analyze the sentiment of this review, please?", + "Provide the sentiment analysis for this text.", + "Evaluate this review's sentiment.", + "What's the sentiment of this review?", + "Can you analyze the sentiment of this review?", + "Please assess the sentiment of this text.", + "Could you provide the sentiment analysis of this review?", + "Analyze this text's sentiment, please.", + "I'd like to know the sentiment of this review.", + "Determine the sentiment of this review, please.", + "Analyze the sentiment of this text, please.", + "Can you evaluate the sentiment of this review?", + "What's the sentiment of this text, please?", + "I'm interested in the sentiment analysis of this text.", + "Please analyze the sentiment of this review text.", + "Assess this text's sentiment, please.", + "Provide the sentiment analysis for this text.", + "Evaluate this text's sentiment, please.", + "What's the sentiment of this text?", + "Can you determine the sentiment of this text, please?", + "I'd like to know the sentiment of this review text.", + "Determine the sentiment of this text, please.", + "Analyze this review text's sentiment.", + "Please assess the sentiment of this text, please.", + "I'm curious about the sentiment of this text.", + "Could you analyze the sentiment of this review text, please?", + "Provide the sentiment analysis of this text.", + "Evaluate the sentiment of this review text, please.", + "What's the sentiment of this review text, please?", + "Can you evaluate the sentiment of this review text?", + "I'm interested in the sentiment analysis of this review text.", + "Please analyze the sentiment of this review text.", + "Assess this review text's sentiment, please.", + "Determine the sentiment of this review text.", + "Analyze the sentiment of this review text, please.", + "What's the sentiment of this review text?", + "Can you determine the sentiment of this review text, please?", + "I'd like to know the sentiment of this text, please.", + "Determine the sentiment of this text, please.", + "Analyze this text's sentiment.", + "Please assess the sentiment of this text, please.", + "I'm curious about the sentiment of this text.", + "Could you analyze the sentiment of this text, please?", + "Provide the sentiment analysis of this text.", + "Evaluate the sentiment of this text, please.", + "What's the sentiment of this text, please?", + "Can you evaluate the sentiment of this text?", + "I'm interested in the sentiment analysis of this text.", + "Please analyze the sentiment of this text.", + "Assess this text's sentiment, please.", + "Determine the sentiment of this text.", + "Analyze the sentiment of this text, please.", + "What's the sentiment of this text?", + "Can you determine the sentiment of this text, please?", + "I'd like to know the sentiment of this text, please.", + "Determine the sentiment of this text, please.", + "Analyze this text's sentiment.", + "Please assess the sentiment of this text, please.", + "I'm curious about the sentiment of this text.", + "Could you analyze the sentiment of this text, please?", + "Provide the sentiment analysis of this text.", + "Evaluate the sentiment of this text, please.", + "What's the sentiment of this text, please?", + "Can you evaluate the sentiment of this text?", + "I'm interested in the sentiment analysis of this text.", + "Please analyze the sentiment of this text.", + "Assess this text's sentiment, please.", + "Determine the sentiment of this text.", + "Analyze the sentiment of this text, please.", + "What's the sentiment of this text?", + "Can you determine the sentiment of this text, please?", + "I'd like to know the sentiment of this text, please.", + "Determine the sentiment of this text, please.", + "Analyze this text's sentiment.", + "Please assess the sentiment of this text, please.", + "I'm curious about the sentiment of this text.", + "Could you analyze the sentiment of this text, please?", + "Provide the sentiment analysis of this text.", + "Evaluate the sentiment of this text, please.", + "What's the sentiment of this text, please?", + "Can you evaluate the sentiment of this text?", + "I'm interested in the sentiment analysis of this text.", + ], + } diff --git a/prompting/tasks/generic_instruction.py b/prompting/tasks/generic_instruction.py index 4d27773f8..9f5de0f9c 100644 --- a/prompting/tasks/generic_instruction.py +++ b/prompting/tasks/generic_instruction.py @@ -13,10 +13,10 @@ class GenericInstructionTask(Task): - challenge_type = 'query' name = "generic" desc = "get help on answering a general instruction" goal = "to get the answer to the following instruction" + challenge_type = 'query' reward_definition = [ dict(name="rouge", ngram="rouge-1", metric="f", weight=0.25), diff --git a/prompting/tasks/math.py b/prompting/tasks/math.py index 97d43921e..c1abf208e 100644 --- a/prompting/tasks/math.py +++ b/prompting/tasks/math.py @@ -2,6 +2,7 @@ import bittensor as bt from dataclasses import dataclass from prompting.tasks import Task +from .challenge_templates import MathChallengeTemplate @dataclass @@ -17,15 +18,13 @@ class MathTask(Task): static_reference = True static_query = True + challenge_type = 'paraphrase' + challenge_template = MathChallengeTemplate() def __init__(self, llm_pipeline, context, create_reference=True): self.context = context - self.query = ( - "How can I solve the following problem, " - + context.content - + "? Make sure to include the whole problem when you ask your question." - ) + self.query = context.content self.reference = context.extra["solution"] self.topic = context.title self.subtopic = context.topic diff --git a/prompting/tasks/qa.py b/prompting/tasks/qa.py index f50607907..2fa293087 100644 --- a/prompting/tasks/qa.py +++ b/prompting/tasks/qa.py @@ -1,8 +1,8 @@ +import bittensor as bt from dataclasses import dataclass from prompting.tasks import Task # TODO: introduce criteria for the query and reference answer (length, layout, etc.) and make these arguments -# TODO # Used to instruct the LLM to provide a good query when given a context QUERY_SYSTEM_PROMPT = """\ @@ -19,6 +19,20 @@ {context} """ +# Used to obtain the query (which is a followup question about the context) +# TODO: we may not need the entire conversation history - we can sample a subset of it (first k messages, last k messages, etc.) +FOLLOWUP_PROMPT_TEMPLATE = """ +Compose a single, specific question to continue the dialogue below. Adopt the persona of the original user, including their communication style and objectives. The question should be based on the previous exchanges and must not be answerable with a simple yes or no. + +The question should require detailed knowledge of the conversation history for a correct response, emphasizing requests for clarification or additional details (e.g., 'What specific steps did you take?' or 'How did that situation resolve?'). Avoid referring to the subject by name and instead use indirect pronouns or descriptions (e.g., 'he,' 'she,' 'it'). Avoid answering the question yourself and refrain from providing new information not already discussed. + +# Context: +{context} + +# Conversation History: +{history} +""" + # Used to obtain reference answer REFERENCE_PROMPT_TEMPLATE = """\ Answer the question you will receive in detail, utilizing the following context. @@ -30,6 +44,20 @@ {question} """ +# TODO: We also need a special followup reference prompt (or just merge both) +# Used to obtain reference answer +FOLLOWUP_REFERENCE_PROMPT_TEMPLATE = """\ +Answer the question you will receive in detail, utilizing the following context and conversation history as required. + +#Context: +{context} + +# Conversation History: +{history} + +# Question: +{question} +""" @dataclass class QuestionAnsweringTask(Task): @@ -49,21 +77,32 @@ class QuestionAnsweringTask(Task): dict(name="remove_quotes"), dict(name="prune_ending"), dict(name="remove_roles"), + dict(name="remove_post_question_text"), ] - def __init__(self, llm_pipeline, context, create_reference=True): + def __init__(self, llm_pipeline, context, create_reference=True, history=None): self.context = context self.query_system_prompt = QUERY_SYSTEM_PROMPT - self.query_prompt = QUERY_PROMPT_TEMPLATE.format(context=context.content) + if history: + self.query_prompt = FOLLOWUP_PROMPT_TEMPLATE.format(context=context.content, history=history) + bt.logging.warning(f'Using history!!\n{history=}\n\n{context=}\n\n{self.query_prompt=}') + else: + self.query_prompt = QUERY_PROMPT_TEMPLATE.format(context=context.content) + self.query = self.generate_query(llm_pipeline) - self.reference_prompt = REFERENCE_PROMPT_TEMPLATE.format( - context=context.content, question=self.query - ) + if history: + self.reference_prompt = FOLLOWUP_REFERENCE_PROMPT_TEMPLATE.format( + context=context.content, question=self.query, history=history + ) + else: + self.reference_prompt = REFERENCE_PROMPT_TEMPLATE.format( + context=context.content, question=self.query + ) if create_reference: self.reference = self.generate_reference(llm_pipeline) - + self.topic = context.title self.subtopic = context.topic self.tags = context.tags diff --git a/prompting/tasks/sentiment.py b/prompting/tasks/sentiment.py new file mode 100644 index 000000000..60635d59a --- /dev/null +++ b/prompting/tasks/sentiment.py @@ -0,0 +1,37 @@ +from prompting.tasks import Task +from .challenge_templates import SentimentChallengeTemplate + +QUERY_PROMPT_TEMPLATE = """\ +You are a review-generating expert, focusing on making highly reaslistic revies. Your response contains only the review, nothing more, nothing less. You will adhere to a word limit of 250 words. Ask a specific question about the following context: +{context} +""" + + +class SentimentAnalysisTask(Task): + name = "sentiment" + desc = "get help analyzing the sentiment of a review" + goal = "to get the sentiment to the following review" + challenge_type = 'paraphrase' + challenge_type = 'paraphrase' + challenge_template = SentimentChallengeTemplate() + + reward_definition = [ + dict(name="ordinal", weight=1.0), + ] + penalty_definition = [] + cleaning_pipeline = [] + + static_reference = True + + def __init__(self, llm_pipeline, context, create_reference=True): + self.context = context + self.query_prompt = QUERY_PROMPT_TEMPLATE.format(context=context.content) + self.query = self.generate_query(llm_pipeline) + self.reference = context.subtopic + + self.topic = context.title + self.subtopic = context.topic + self.tags = context.tags + + def format_challenge(self, challenge) -> str: + return challenge.format(context = self.query) \ No newline at end of file diff --git a/prompting/tasks/task.py b/prompting/tasks/task.py index cfa03a3d8..d837aac46 100644 --- a/prompting/tasks/task.py +++ b/prompting/tasks/task.py @@ -49,6 +49,7 @@ class Task(ABC): query_system_prompt = "" query_prompt = "" cleaner = None + challenge_type = 'inference' def __str__(self): return f"{self.__class__.__name__}(name={self.name!r}, desc={self.desc!r}, goal={self.goal!r}, query={self.query!r}, reference={self.reference!r}, topic={self.topic!r}, subtopic={self.subtopic!r}, tags={self.tags!r})" diff --git a/prompting/tasks/translate.py b/prompting/tasks/translate.py new file mode 100644 index 000000000..1913db6ed --- /dev/null +++ b/prompting/tasks/translate.py @@ -0,0 +1,130 @@ +import tqdm +import bittensor as bt +import argostranslate.package +import argostranslate.translate +import random +from typing import List, Tuple +from prompting.tasks import Task +from dataclasses import dataclass +from argostranslate.package import AvailablePackage +from prompting.shared import Context + +SUPPORTED_LANGUAGES = [ + "en", "es", "fr", "pt", "uk" +] + +class TranslationPipeline: + def __init__(self): + self.supported_language_pairs = self.load_translation_packages() + + def load_translation_packages(self) -> List[AvailablePackage]: + # Update package index and get available and installed packages + argostranslate.package.update_package_index() + available_packages = argostranslate.package.get_available_packages() + installed_packages = argostranslate.package.get_installed_packages() + + # Helper function to check if a package is installed + def is_package_installed(from_code, to_code, packages): + return any(pkg for pkg in packages if pkg.from_code == from_code and pkg.to_code == to_code) + + # Filter available packages for supported language pairs + supported_language_pairs = [ + pkg for pkg in available_packages + if pkg.from_code in SUPPORTED_LANGUAGES and pkg.to_code in SUPPORTED_LANGUAGES + ] + + bt.logging.info(f"Supported language pairs: {supported_language_pairs}") + + # Check for installed packages + pbar = tqdm.tqdm(supported_language_pairs, desc="Checking installed packages") + for package in pbar: + if not is_package_installed(package.from_code, package.to_code, installed_packages): + bt.logging.info(f"Installing package from {package.from_code} to {package.to_code}") + package_path = str(package.download()) + argostranslate.package.install_from_path(package_path) + bt.logging.success(f'Package successfully installed at {package_path}') + else: + bt.logging.info(f"Package from {package.from_code} to {package.to_code} is already installed, skipping...") + + return supported_language_pairs + + def random_translation(self, content:str) -> str: + # TODO: NOT RANDOM + from_code = self.SUPPORTED_LANGUAGES[0] + to_code = self.SUPPORTED_LANGUAGES[1] + return argostranslate.translate.translate(content, from_code, to_code) + + def translate(self, content: str, from_code: str, to_code: str): + self.reference = argostranslate.translate.translate(content, from_code, to_code) + + def translate_to_random_language(self, content: str, from_code:str='en') -> Tuple[AvailablePackage, str]: + english_supported_languages = list(filter(lambda x: x.from_code == from_code, self.supported_language_pairs)) + available_translations = list(map(lambda x: x, english_supported_languages)) + + random_translation_obj = random.choice(available_translations) + translation_code = random_translation_obj.to_code + + translated_content = argostranslate.translate.translate(content, from_code, to_code=translation_code) + + return random_translation_obj, translated_content + + +@dataclass +class TranslationTask(Task): + name = "translation" + desc = "get translation help" + goal = "to get the translation for the given piece of text" + + templates = [ + "Could you assist me with translating the following text into {another_language}? \n{text}", + "I need some help translating this text into {another_language}. Can you do it? \n{text}", + "Is it possible for you to translate this text for me into {another_language}? Here it is: \n{text}", + "Would you mind helping me convert this text into {another_language}? \n{text}", + "Could you please convert this into {another_language} for me? \n{text}", + "I was wondering if you could help translate this into {another_language}? \n{text}", + "Can you provide a translation for this text into {another_language}? \n{text}", + "Hey, can you turn this text into {another_language} for me? \n{text}", + "Could I get some assistance in translating this into {another_language}? \n{text}", + "Are you able to help me render this text in {another_language}? \n{text}", + "I'd appreciate your help translating this text into {another_language}. Here's the text: \n{text}", + "Please could you translate the following text into {another_language}? \n{text}", + "Might you help me by translating this text to {another_language}? \n{text}", + "I'm looking for help to translate this text into {another_language}. Any chance you can assist? \n{text}", + "How about translating this text into {another_language} for me? \n{text}", + "Would it be possible for you to help translate this text into {another_language}? \n{text}", + "I need your expertise to translate this text into {another_language}, can you help? \n{text}", + "Can you work your magic and translate this text into {another_language}? \n{text}", + "I require assistance translating the following into {another_language}. Can you help? \n{text}", + "Hey, could you take a moment to translate this text into {another_language} for me? \n{text}", + ] + + # TODO: TEST BLEU SCORE + reward_definition = [ + dict(name="rouge", ngram="rouge-1", metric="f", weight=1), + ] + penalty_definition = [ + dict(name="rouge", ngram="rouge-1", metric="f", weight=1), + ] + + cleaning_pipeline = [] + + def __init__(self, translation_pipeline: TranslationPipeline, context: Context): + # Set task internal variables + self.context = context + self.topic = context.title + self.subtopic = context.topic + self.tags = context.tags + + # Translates english text to a random language + content_translation_obj, translated_content = translation_pipeline.translate_to_random_language(context.content) + + # Translates the translation to another random language + reference_translation_obj, reference_translation_content = translation_pipeline.translate_to_random_language(content=translated_content, from_code=content_translation_obj.to_code) + self.reference = reference_translation_content + + # Composes the query + # TODO: Implement template translation + template = random.choice(self.templates) + self.query = template.format(another_language=reference_translation_obj.to_name, text=translated_content) + + \ No newline at end of file diff --git a/prompting/tools/__init__.py b/prompting/tools/__init__.py index c296ce17c..82e3713d9 100644 --- a/prompting/tools/__init__.py +++ b/prompting/tools/__init__.py @@ -1,5 +1,4 @@ from .datasets import ( - Context, Dataset, MockDataset, HFCodingDataset, @@ -8,6 +7,7 @@ WikiDateDataset, MathDataset, GenericInstructionDataset, + ReviewDataset, ) from .selector import Selector @@ -18,6 +18,7 @@ MathDataset.name: MathDataset, WikiDateDataset.name: WikiDateDataset, GenericInstructionDataset.name: GenericInstructionDataset, + ReviewDataset.name: ReviewDataset } diff --git a/prompting/tools/datasets/__init__.py b/prompting/tools/datasets/__init__.py index 948fead7d..3bdda191b 100644 --- a/prompting/tools/datasets/__init__.py +++ b/prompting/tools/datasets/__init__.py @@ -1,7 +1,7 @@ -from .context import Context from .base import Dataset from .code import HFCodingDataset, StackOverflowDataset from .math import MathDataset from .mock import MockDataset from .wiki import WikiDataset, WikiDateDataset -from .generic_instruction import GenericInstructionDataset \ No newline at end of file +from .generic_instruction import GenericInstructionDataset +from .review import ReviewDataset \ No newline at end of file diff --git a/prompting/tools/datasets/base.py b/prompting/tools/datasets/base.py index a2130e929..ee4535601 100644 --- a/prompting/tools/datasets/base.py +++ b/prompting/tools/datasets/base.py @@ -24,7 +24,7 @@ import bittensor as bt from ..selector import Selector -from .context import Context +from prompting.shared.context import Context from prompting.utils.exceptions import MaxRetryError diff --git a/prompting/tools/datasets/review.py b/prompting/tools/datasets/review.py new file mode 100644 index 000000000..cfadbcf8e --- /dev/null +++ b/prompting/tools/datasets/review.py @@ -0,0 +1,38 @@ +import random +import functools + +import bittensor as bt +from typing import Dict, Union, List, Tuple +from .base import TemplateDataset + + +class ReviewDataset(TemplateDataset): + "Review dataset, which creates LLM prompts for writing reviews." + name = 'review' + SENTIMENTS = ["positive", "neutral", "negative"] + # TODO: Expand the params to create a larger dataset, while maintaining the same quality. + query_template = "Create a {topic} review of a {title}. The review must be of {subtopic} sentiment." + params = dict( + topic=[ + "short", + "long", + "medium length", + "twitter", + "amazon", + "terribly written", + "hilarious", + ], + title=[ + "movie", + "book", + "restaurant", + "hotel", + "product", + "service", + "car", + "company", + "live event", + ], + subtopic=SENTIMENTS, + ) + \ No newline at end of file diff --git a/prompting/validator.py b/prompting/validator.py index 094c7eb9a..4a54930af 100644 --- a/prompting/validator.py +++ b/prompting/validator.py @@ -3,6 +3,7 @@ from prompting.llms import vLLMPipeline from prompting.base.validator import BaseValidatorNeuron from prompting.rewards import RewardPipeline +from prompting.tasks.translate import TranslationPipeline class Validator(BaseValidatorNeuron): @@ -20,7 +21,8 @@ def __init__(self, config=None): model_id=self.config.neuron.model_id, device=self.device, mock=self.config.mock, - ) + ) + self.translation_pipeline = TranslationPipeline() if abs(1-sum(self.config.neuron.task_p)) > 0.001: raise ValueError("Task probabilities do not sum to 1.") @@ -75,4 +77,3 @@ def __exit__(self, exc_type, exc_value, traceback): self.thread.join(5) self.is_running = False bt.logging.debug("Stopped") - diff --git a/requirements.txt b/requirements.txt index 4d77ae6c5..9a91e8df0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ langchainhub==0.1.14 python-dotenv wikipedia_sections vllm -loguru \ No newline at end of file +loguru +argostranslate \ No newline at end of file diff --git a/tests/fixtures/task.py b/tests/fixtures/task.py index 3eca818e9..af6e9d055 100644 --- a/tests/fixtures/task.py +++ b/tests/fixtures/task.py @@ -6,8 +6,9 @@ DebuggingTask, MathTask, DateQuestionAnsweringTask, + TranslationTask ) -from prompting.tools import Context +from prompting.shared import Context from .dataset import WIKI_CONTEXT, CODING_CONTEXT, MATH_CONTEXT, DATEQA_CONTEXT, MOCK_CONTEXT TASKS = [ @@ -17,6 +18,8 @@ DebuggingTask, MathTask, DateQuestionAnsweringTask, + #TODO: Add proper separation for tranlation task tests + #TranslationTask ] CONTEXTS = { @@ -26,6 +29,7 @@ DebuggingTask: CODING_CONTEXT, MathTask: MATH_CONTEXT, DateQuestionAnsweringTask: DATEQA_CONTEXT, + #TranslationTask: WIKI_CONTEXT } TASK_FIELDS = { diff --git a/tests/test_dataset.py b/tests/test_dataset.py index ae352debe..9d208bd1a 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -2,7 +2,7 @@ from .fixtures.dataset import DATASETS, CONTEXTS, CONTEXT_FIELDS from prompting.tools.datasets import Dataset -from prompting.tools import Context +from prompting.shared import Context @pytest.mark.parametrize("dataset", DATASETS) diff --git a/tests/test_forward.py b/tests/test_forward.py index 0b5a58b93..aa826a179 100644 --- a/tests/test_forward.py +++ b/tests/test_forward.py @@ -47,7 +47,7 @@ def test_generate_reference_parallel_to_dendrite( mock_neuron.dendrite = partial(mock_dendrite_call, delay=dendrite_time) - event = asyncio.run(run_step(mock_neuron, mock_agent, k=4, timeout=0.1)) + event = asyncio.run(run_step(self=mock_neuron, agent=mock_agent, roles=[], messages=[], k=4, timeout=0.1)) step_time = event["step_time"] reward_pipeline_time = sum(