macrocosm-os · steffencruz · Apr 17, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/neurons/validator.py b/neurons/validator.py
@@ -40,7 +40,7 @@ def __init__(self, config=None):
             mock=self.config.mock,
         )
 
-        if sum(self.config.neuron.task_p) != 1:
+        if abs(1-sum(self.config.neuron.task_p)) > 0.001:
             raise ValueError("Task probabilities do not sum to 1.")
 
         # Filter out tasks with 0 probability

diff --git a/prompting/task_registry.py b/prompting/task_registry.py
@@ -1,19 +1,21 @@
-from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask
-from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset
+from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask
+from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset, GenericInstructionDataset
 
 # TODO: Expand this to include extra information beyond just the task and dataset names
 mock_task, mock_dataset = MockTask.name, [MockDataset.name]
 summarization_task, summarization_dataset = SummarizationTask.name, [WikiDataset.name]
 qa_task, qa_dataset = QuestionAnsweringTask.name, [WikiDataset.name]
-debugging_task, debugging_dataset = DebuggingTask.name, [HFCodingDataset.name]
+#debugging_task, debugging_dataset = DebuggingTask.name, [HFCodingDataset.name]
 math_task, math_dataset = MathTask.name, [MathDataset.name]
 date_qa_task, date_qa_dataset = DateQuestionAnsweringTask.name, [WikiDateDataset.name]
+generic_instruction_task, generic_instruction_dataset = GenericInstructionTask.name, [GenericInstructionDataset.name]
 
 TASK_REGISTRY = {
     mock_task: mock_dataset,
     summarization_task: summarization_dataset,
     qa_task: qa_dataset,
-    debugging_task: debugging_dataset,
+    #debugging_task: debugging_dataset,
     math_task: math_dataset,
     date_qa_task: date_qa_dataset,
+    generic_instruction_task: generic_instruction_dataset
 }
diff --git a/prompting/tasks/__init__.py b/prompting/tasks/__init__.py
@@ -13,7 +13,7 @@
     QuestionAnsweringTask.name: QuestionAnsweringTask,
     DateQuestionAnsweringTask.name: DateQuestionAnsweringTask,
     SummarizationTask.name: SummarizationTask,
-    DebuggingTask.name: DebuggingTask,
-    #GenericInstructionTask.name: GenericInstructionTask,
+    #DebuggingTask.name: DebuggingTask,
+    GenericInstructionTask.name: GenericInstructionTask,
     MathTask.name: MathTask,
 }
diff --git a/prompting/tasks/date_qa.py b/prompting/tasks/date_qa.py
@@ -22,7 +22,7 @@ class DateQuestionAnsweringTask(Task):
     static_reference = True
     static_query = True
 
-    def __init__(self, llm_pipeline, context, create_reference=True):
+    def __init__(self, llm_pipeline, context, create_reference =True):
         self.context = context
 
         self.query = (

diff --git a/prompting/tasks/generic_instruction.py b/prompting/tasks/generic_instruction.py
@@ -1,141 +1,46 @@
-import re
-import bittensor as bt
-from dataclasses import dataclass
-from tenacity import retry, stop_after_attempt
 from prompting.tasks import Task
-from typing import Tuple
 
-CRITERIA_GENERATION_PROMPT = """\
-We are brainstorming criteria with which to grade a language model on its responses in
-diverse situations.
-A ‘criteria‘ is some useful, real-world objective, and associated rubric for scores 1-5, that
-tests a capability.
-
-Please brainstorm a new criteria and scoring rubrics.
-Be creative and create new but useful criteria that people in different settings or industries
-might find practical.
-Please format the output as same as the above examples with no extra or surrounding text.
-Write [END] after you are done.
-New Criteria:
+QUERY_PROMPT_TEMPLATE = """\
+You are a question-generating expert, focusing on delivering comprehensive and accurate questions with depth and clarity. Your response contains only the question, nothing more, nothing less. You will adhere to a word limit of 100 words.
+{context}
 """
 
+REFERENCE_PROMPT_TEMPLATE = """\
+Answer the following question.
 
-INSTRUCTION_GENERATION_PROMPT = """\
-Your job is to generate a new novel problem and a response that is related to the given score
-rubric.
-The score rubric:
-{CRITERIA}
-* Problem
-- The problem should inherently be related to the score criteria and score rubric given above.
-Specifically, the score criteria should be the core attributes required to solve the problem.
-- The problem itself should not be too generic or easy to solve.
-- If the score rubric is related to logical abilities, generate problems that require math or
-coding abilities.
-- Try to make the person who might solve the problem not notice the existence of the score
-rubric by not explicitly mentioning it, and also provide additional inputs and options if
-needed.
-- Assume a situation where a user is interacting with an AI model. The user would try to
-ask in a first-person point of view, but not using terms like ”I”, ”A User” or ”You” in the
-first sentence.
-- Do not give a role to the AI, assume that the user is asking a question from his point of
-view.
-- Do not include any phrase related to AI model in the problem.
-* Response
-- The response should be a response that would get a score of 5 from the score rubric.
-- The response should be as detailed as possible unless the score rubric is related to
-conciseness or brevity. It should consist of multiple paragraphs, a list of items, or a
-step-by-step reasoning process.
-- The response should look like how a well-prompted GPT-4 would normally answer your
-problem.
-* Format
-- DO NOT WRITE ANY GREETING MESSAGES, just write the problem and response
-only.
-- In front of the problem, append the phrase ”Problem:” and in front of the response, append
-the phrase ”Response:”.
-- Write in the order of ”Problem” - ”Response”, where the two items are separated by the
-phrase ”[NEXT]”.
-- Write [END] after you are done.
-Data Generation:
-"""
+# Question:
+{query}"""
 
 
-@dataclass
 class GenericInstructionTask(Task):
+    challenge_type = 'query'
+    name = "generic"
+    desc = "get help on answering a general instruction"
+    goal = "to get the answer to the following instruction"
+
     reward_definition = [
         dict(name="rouge", ngram="rouge-1", metric="f", weight=1.0),
-        dict(name="relevance", threshold=None, weight=1.0),
+    ]
+    penalty_definition = [
+        dict(name="rouge", ngram="rouge-1", metric="f", weight=0.5),
     ]
 
-    def __init__(self, llm_pipeline):
-        super().__init__(
-            name="generic_instruction",
-            goal="to get the answer to a instruction",
-            delimiter="```",
-            reward_types=[
-                "CRITERIA_REWARD",
-            ],
-            reward_threshold=0.5,
-            use_challenge_as_prompt=True,
-            desc="",
-            topics={},
-            topic="",
-            subtopic="",
-            challenge="",
-            reference="",
-            criteria="",
-        )
-
-        self.criteria = self.create_criteria(llm_pipeline)
-        instruction, reference = self.create_instruction_and_reference(llm_pipeline)
-        self.challenge = instruction
-        self.reference = reference
-
-    def extract_instruction_and_reference_from_text(self, text: str) -> Tuple[str, str]:
-        # Split the text into problem and response using regular expression
-        split_text = re.split(r"\nResponse:\n", text)
-
-        # Extract problem and response
-        problem = split_text[0].strip()
-        response = split_text[1].strip()
-
-        return problem, response
-
-    def create_criteria(self, llm) -> str:
-        bt.logging.debug("🎲 Creating a generic criteria-scoring rubric ...")
-
-        # Generate a score rubric with defined criterias
-        criteria_generation_response = llm(CRITERIA_GENERATION_PROMPT)
-        return criteria_generation_response
-
-    @retry(stop=stop_after_attempt(5))
-    def create_instruction_and_reference(self, llm) -> Tuple[str, str]:
-        try:
-            bt.logging.debug("📋 🎯 Creating instruction and referece text...")
+    cleaning_pipeline = [
+        dict(name="remove_quotes"),
+        dict(name="prune_ending"),
+        dict(name="remove_roles"),
+    ]
 
-            if not self.criteria:
-                raise ValueError(
-                    "Criteria must be defined before creating a generic instruction."
-                )
+    def __init__(self, llm_pipeline, context, create_reference=True):
+        self.context = context
 
-            # Create generic instruction with the score rubric
-            instruction_generation_prompt_with_criteria = (
-                INSTRUCTION_GENERATION_PROMPT.format(CRITERIA=self.criteria)
-            )
-            instruction_generation_response = llm(
-                instruction_generation_prompt_with_criteria
-            )
+        self.query_prompt = QUERY_PROMPT_TEMPLATE.format(context=context.content)
+        self.query = self.generate_query(llm_pipeline)
 
-            # Extract generic instruction and reference response from the generated text
-            (
-                instruction,
-                reference,
-            ) = self.extract_instruction_and_reference_from_text(
-                instruction_generation_response
-            )
+        self.reference_prompt = REFERENCE_PROMPT_TEMPLATE.format(query = self.query)
+        if create_reference:
+            self.reference = self.generate_reference(llm_pipeline)
 
-            return instruction, reference
-        except Exception as e:
-            bt.logging.error(
-                f"Failed to create instruction and reference text: {e}, retrying..."
-            )
-            raise e
+        self.topic = context.title
+        self.subtopic = context.topic
+        self.tags = context.tags
diff --git a/prompting/tasks/summarization.py b/prompting/tasks/summarization.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from prompting.tasks import Task
-from transformers import Pipeline
+
 
 
 # TODO: introduce criteria for the query and reference answer (length, layout, etc.) and make these arguments
@@ -39,7 +39,7 @@ class SummarizationTask(Task):
 
     static_query = True
 
-    def __init__(self, llm_pipeline: Pipeline, context: str, create_reference=True):
+    def __init__(self, llm_pipeline, context, create_reference=True):
         self.context = context
 
         # Query is just the article title and section name

diff --git a/prompting/tools/__init__.py b/prompting/tools/__init__.py
@@ -7,16 +7,18 @@
     StackOverflowDataset,
     WikiDateDataset,
     MathDataset,
+    GenericInstructionDataset,
 )
 from .selector import Selector
 
 DATASETS = {
-    "mock": MockDataset,
-    "hf_coding": HFCodingDataset,
-    "wiki": WikiDataset,
-    #"stack_overflow": StackOverflowDataset,
-    "wiki_date": WikiDateDataset,
-    "math": MathDataset,
+    MockDataset.name: MockDataset,
+    #HFCodingDataset.name: HFCodingDataset,
+    WikiDataset.name: WikiDataset,
+    #StackOverflowDataset.name: StackOverflowDataset,
+    MathDataset.name: MathDataset,
+    WikiDateDataset.name: WikiDateDataset,
+    GenericInstructionDataset.name: GenericInstructionDataset, 
 }
 
 

diff --git a/prompting/tools/datasets/__init__.py b/prompting/tools/datasets/__init__.py
@@ -4,3 +4,4 @@
 from .math import MathDataset
 from .mock import MockDataset
 from .wiki import WikiDataset, WikiDateDataset
+from .generic_instruction import GenericInstructionDataset
diff --git a/prompting/tools/datasets/base.py b/prompting/tools/datasets/base.py
@@ -17,6 +17,8 @@
 # DEALINGS IN THE SOFTWARE.
 
 import time
+import random
+import functools
 from abc import ABC, abstractmethod
 from typing import Dict
 import bittensor as bt
@@ -28,7 +30,7 @@
 
 class Dataset(ABC):
     """Base class for datasets."""
-    name = "dataset"
+
     max_tries: int = 10
 
     @abstractmethod
@@ -74,11 +76,51 @@ def next(
                     f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries."
                 )
 
+        info["source"] = self.__class__.__name__
         info["stats"] = {
-            "creator": self.__class__.__name__,
             "fetch_time": time.time() - t0,
             "num_tries": tries,
             "fetch_method": method,
             "next_kwargs": kwargs,
         }
         return Context(**info)
+
+
+class TemplateDataset(Dataset):
+    """Base class for datasets based on a template."""
+
+    @property
+    def size(self):
+        return functools.reduce(
+            lambda x, y: x * y, [len(v) for v in self.params.values()], 1
+        )
+
+    def __repr__(self):
+        return f"{self.__class__.__name__} with template: {self.query_template!r} and {self.size} possible phrases"
+
+    def get(self, params: dict):
+        content = self.query_template.format(**params)
+        keys, values = list(zip(*params.items()))
+
+        return {
+            "title": params.get(
+                "title", keys[0]
+            ),  # Use the first key as the title if no field called title is present
+            "topic": params.get("topic", keys[min(1, len(keys) - 1)]),  # Same for topic
+            "subtopic": params.get(
+                "subtopic", keys[min(2, len(keys) - 2)]
+            ),  # Same for subtopic
+            "content": content,  # content
+            "internal_links": values,  # internal links
+            "external_links": values,  # external links
+            "tags": values,  # tags
+            "extra": {},
+        }
+
+    def random(self, selector: Selector = None):
+        selected = {k: selector(v) for k, v in self.params.items()}
+        return self.get(selected)
+
+    def search(self, params: dict, selector: Selector = None):
+        selected = {k: params.get(k, selector(v)) for k, v in self.params.items()}
+        return self.get(selected)