Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, config=None):
mock=self.config.mock,
)

if sum(self.config.neuron.task_p) != 1:
if abs(1-sum(self.config.neuron.task_p)) > 0.001:
raise ValueError("Task probabilities do not sum to 1.")

# Filter out tasks with 0 probability
Expand Down
10 changes: 6 additions & 4 deletions prompting/task_registry.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask
from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset
from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask
from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset, GenericInstructionDataset

# TODO: Expand this to include extra information beyond just the task and dataset names
mock_task, mock_dataset = MockTask.name, [MockDataset.name]
summarization_task, summarization_dataset = SummarizationTask.name, [WikiDataset.name]
qa_task, qa_dataset = QuestionAnsweringTask.name, [WikiDataset.name]
debugging_task, debugging_dataset = DebuggingTask.name, [HFCodingDataset.name]
#debugging_task, debugging_dataset = DebuggingTask.name, [HFCodingDataset.name]
math_task, math_dataset = MathTask.name, [MathDataset.name]
date_qa_task, date_qa_dataset = DateQuestionAnsweringTask.name, [WikiDateDataset.name]
generic_instruction_task, generic_instruction_dataset = GenericInstructionTask.name, [GenericInstructionDataset.name]

TASK_REGISTRY = {
mock_task: mock_dataset,
summarization_task: summarization_dataset,
qa_task: qa_dataset,
debugging_task: debugging_dataset,
#debugging_task: debugging_dataset,
math_task: math_dataset,
date_qa_task: date_qa_dataset,
generic_instruction_task: generic_instruction_dataset
}
4 changes: 2 additions & 2 deletions prompting/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
QuestionAnsweringTask.name: QuestionAnsweringTask,
DateQuestionAnsweringTask.name: DateQuestionAnsweringTask,
SummarizationTask.name: SummarizationTask,
DebuggingTask.name: DebuggingTask,
#GenericInstructionTask.name: GenericInstructionTask,
#DebuggingTask.name: DebuggingTask,
GenericInstructionTask.name: GenericInstructionTask,
MathTask.name: MathTask,
}
2 changes: 1 addition & 1 deletion prompting/tasks/date_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class DateQuestionAnsweringTask(Task):
static_reference = True
static_query = True

def __init__(self, llm_pipeline, context, create_reference=True):
def __init__(self, llm_pipeline, context, create_reference =True):
self.context = context

self.query = (
Expand Down
155 changes: 30 additions & 125 deletions prompting/tasks/generic_instruction.py
Original file line number Diff line number Diff line change
@@ -1,141 +1,46 @@
import re
import bittensor as bt
from dataclasses import dataclass
from tenacity import retry, stop_after_attempt
from prompting.tasks import Task
from typing import Tuple

CRITERIA_GENERATION_PROMPT = """\
We are brainstorming criteria with which to grade a language model on its responses in
diverse situations.
A ‘criteria‘ is some useful, real-world objective, and associated rubric for scores 1-5, that
tests a capability.

Please brainstorm a new criteria and scoring rubrics.
Be creative and create new but useful criteria that people in different settings or industries
might find practical.
Please format the output as same as the above examples with no extra or surrounding text.
Write [END] after you are done.
New Criteria:
QUERY_PROMPT_TEMPLATE = """\
You are a question-generating expert, focusing on delivering comprehensive and accurate questions with depth and clarity. Your response contains only the question, nothing more, nothing less. You will adhere to a word limit of 100 words.
{context}
"""

REFERENCE_PROMPT_TEMPLATE = """\
Answer the following question.

INSTRUCTION_GENERATION_PROMPT = """\
Your job is to generate a new novel problem and a response that is related to the given score
rubric.
The score rubric:
{CRITERIA}
* Problem
- The problem should inherently be related to the score criteria and score rubric given above.
Specifically, the score criteria should be the core attributes required to solve the problem.
- The problem itself should not be too generic or easy to solve.
- If the score rubric is related to logical abilities, generate problems that require math or
coding abilities.
- Try to make the person who might solve the problem not notice the existence of the score
rubric by not explicitly mentioning it, and also provide additional inputs and options if
needed.
- Assume a situation where a user is interacting with an AI model. The user would try to
ask in a first-person point of view, but not using terms like ”I”, ”A User” or ”You” in the
first sentence.
- Do not give a role to the AI, assume that the user is asking a question from his point of
view.
- Do not include any phrase related to AI model in the problem.
* Response
- The response should be a response that would get a score of 5 from the score rubric.
- The response should be as detailed as possible unless the score rubric is related to
conciseness or brevity. It should consist of multiple paragraphs, a list of items, or a
step-by-step reasoning process.
- The response should look like how a well-prompted GPT-4 would normally answer your
problem.
* Format
- DO NOT WRITE ANY GREETING MESSAGES, just write the problem and response
only.
- In front of the problem, append the phrase ”Problem:” and in front of the response, append
the phrase ”Response:”.
- Write in the order of ”Problem” - ”Response”, where the two items are separated by the
phrase ”[NEXT]”.
- Write [END] after you are done.
Data Generation:
"""
# Question:
{query}"""


@dataclass
class GenericInstructionTask(Task):
challenge_type = 'query'
name = "generic"
desc = "get help on answering a general instruction"
goal = "to get the answer to the following instruction"

reward_definition = [
dict(name="rouge", ngram="rouge-1", metric="f", weight=1.0),
dict(name="relevance", threshold=None, weight=1.0),
]
penalty_definition = [
dict(name="rouge", ngram="rouge-1", metric="f", weight=0.5),
]

def __init__(self, llm_pipeline):
super().__init__(
name="generic_instruction",
goal="to get the answer to a instruction",
delimiter="```",
reward_types=[
"CRITERIA_REWARD",
],
reward_threshold=0.5,
use_challenge_as_prompt=True,
desc="",
topics={},
topic="",
subtopic="",
challenge="",
reference="",
criteria="",
)

self.criteria = self.create_criteria(llm_pipeline)
instruction, reference = self.create_instruction_and_reference(llm_pipeline)
self.challenge = instruction
self.reference = reference

def extract_instruction_and_reference_from_text(self, text: str) -> Tuple[str, str]:
# Split the text into problem and response using regular expression
split_text = re.split(r"\nResponse:\n", text)

# Extract problem and response
problem = split_text[0].strip()
response = split_text[1].strip()

return problem, response

def create_criteria(self, llm) -> str:
bt.logging.debug("🎲 Creating a generic criteria-scoring rubric ...")

# Generate a score rubric with defined criterias
criteria_generation_response = llm(CRITERIA_GENERATION_PROMPT)
return criteria_generation_response

@retry(stop=stop_after_attempt(5))
def create_instruction_and_reference(self, llm) -> Tuple[str, str]:
try:
bt.logging.debug("📋 🎯 Creating instruction and referece text...")
cleaning_pipeline = [
dict(name="remove_quotes"),
dict(name="prune_ending"),
dict(name="remove_roles"),
]

if not self.criteria:
raise ValueError(
"Criteria must be defined before creating a generic instruction."
)
def __init__(self, llm_pipeline, context, create_reference=True):
self.context = context

# Create generic instruction with the score rubric
instruction_generation_prompt_with_criteria = (
INSTRUCTION_GENERATION_PROMPT.format(CRITERIA=self.criteria)
)
instruction_generation_response = llm(
instruction_generation_prompt_with_criteria
)
self.query_prompt = QUERY_PROMPT_TEMPLATE.format(context=context.content)
self.query = self.generate_query(llm_pipeline)

# Extract generic instruction and reference response from the generated text
(
instruction,
reference,
) = self.extract_instruction_and_reference_from_text(
instruction_generation_response
)
self.reference_prompt = REFERENCE_PROMPT_TEMPLATE.format(query = self.query)
if create_reference:
self.reference = self.generate_reference(llm_pipeline)

return instruction, reference
except Exception as e:
bt.logging.error(
f"Failed to create instruction and reference text: {e}, retrying..."
)
raise e
self.topic = context.title
self.subtopic = context.topic
self.tags = context.tags
4 changes: 2 additions & 2 deletions prompting/tasks/summarization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from prompting.tasks import Task
from transformers import Pipeline



# TODO: introduce criteria for the query and reference answer (length, layout, etc.) and make these arguments
Expand Down Expand Up @@ -39,7 +39,7 @@ class SummarizationTask(Task):

static_query = True

def __init__(self, llm_pipeline: Pipeline, context: str, create_reference=True):
def __init__(self, llm_pipeline, context, create_reference=True):
self.context = context

# Query is just the article title and section name
Expand Down
14 changes: 8 additions & 6 deletions prompting/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
StackOverflowDataset,
WikiDateDataset,
MathDataset,
GenericInstructionDataset,
)
from .selector import Selector

DATASETS = {
"mock": MockDataset,
"hf_coding": HFCodingDataset,
"wiki": WikiDataset,
#"stack_overflow": StackOverflowDataset,
"wiki_date": WikiDateDataset,
"math": MathDataset,
MockDataset.name: MockDataset,
#HFCodingDataset.name: HFCodingDataset,
WikiDataset.name: WikiDataset,
#StackOverflowDataset.name: StackOverflowDataset,
MathDataset.name: MathDataset,
WikiDateDataset.name: WikiDateDataset,
GenericInstructionDataset.name: GenericInstructionDataset,
}


Expand Down
1 change: 1 addition & 0 deletions prompting/tools/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .math import MathDataset
from .mock import MockDataset
from .wiki import WikiDataset, WikiDateDataset
from .generic_instruction import GenericInstructionDataset
46 changes: 44 additions & 2 deletions prompting/tools/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# DEALINGS IN THE SOFTWARE.

import time
import random
import functools
from abc import ABC, abstractmethod
from typing import Dict
import bittensor as bt
Expand All @@ -28,7 +30,7 @@

class Dataset(ABC):
"""Base class for datasets."""
name = "dataset"

max_tries: int = 10

@abstractmethod
Expand Down Expand Up @@ -74,11 +76,51 @@ def next(
f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries."
)

info["source"] = self.__class__.__name__
info["stats"] = {
"creator": self.__class__.__name__,
"fetch_time": time.time() - t0,
"num_tries": tries,
"fetch_method": method,
"next_kwargs": kwargs,
}
return Context(**info)


class TemplateDataset(Dataset):
"""Base class for datasets based on a template."""

@property
def size(self):
return functools.reduce(
lambda x, y: x * y, [len(v) for v in self.params.values()], 1
)

def __repr__(self):
return f"{self.__class__.__name__} with template: {self.query_template!r} and {self.size} possible phrases"

def get(self, params: dict):
content = self.query_template.format(**params)
keys, values = list(zip(*params.items()))

return {
"title": params.get(
"title", keys[0]
), # Use the first key as the title if no field called title is present
"topic": params.get("topic", keys[min(1, len(keys) - 1)]), # Same for topic
"subtopic": params.get(
"subtopic", keys[min(2, len(keys) - 2)]
), # Same for subtopic
"content": content, # content
"internal_links": values, # internal links
"external_links": values, # external links
"tags": values, # tags
"extra": {},
}

def random(self, selector: Selector = None):
selected = {k: selector(v) for k, v in self.params.items()}
return self.get(selected)

def search(self, params: dict, selector: Selector = None):
selected = {k: params.get(k, selector(v)) for k, v in self.params.items()}
return self.get(selected)
Loading