Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from prompting.llms import HuggingFacePipeline, vLLMPipeline
from prompting.base.validator import BaseValidatorNeuron
from prompting.rewards import RewardPipeline
from prompting.tasks import TranslationPipeline


class Validator(BaseValidatorNeuron):
Expand All @@ -38,7 +39,8 @@ def __init__(self, config=None):
model_id=self.config.neuron.model_id,
device=self.device,
mock=self.config.mock,
)
)
self.translation_pipeline = TranslationPipeline()

if abs(1-sum(self.config.neuron.task_p)) > 0.001:
raise ValueError("Task probabilities do not sum to 1.")
Expand Down
1 change: 1 addition & 0 deletions prompting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from . import agent
from . import conversation
from . import dendrite
from . import shared

from .llms import hf

Expand Down
1 change: 1 addition & 0 deletions prompting/cleaners/all_cleaners.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod
from typing import Union
import bittensor as bt
import re
from typing import Union
Expand Down
9 changes: 8 additions & 1 deletion prompting/conversation.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import random
from transformers import Pipeline
from prompting.tasks import Task, TASKS
from prompting.tasks import Task, TASKS, TranslationPipeline, TranslationTask
from prompting.tools import Selector, DATASETS
from prompting.task_registry import TASK_REGISTRY


def create_task(
llm_pipeline: Pipeline,
translation_pipeline: TranslationPipeline,
task_name: str,
create_reference: bool = True,
selector: Selector = random.choice,
Expand Down Expand Up @@ -42,6 +43,12 @@ def create_task(
raise ValueError(f"Dataset {dataset_name} not found")
else:
dataset = dataset()

if task_name == TranslationTask.name:
return task(
translation_pipeline=translation_pipeline,
context=dataset.next()
)

return task(
llm_pipeline=llm_pipeline,
Expand Down
1 change: 1 addition & 0 deletions prompting/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ async def forward(self):
try:
task = create_task(
llm_pipeline=self.llm_pipeline,
translation_pipeline=self.translation_pipeline,
task_name=task_name,
create_reference=False,
)
Expand Down
1 change: 1 addition & 0 deletions prompting/shared/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .context import Context
File renamed without changes.
4 changes: 3 additions & 1 deletion prompting/task_registry.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask, SentimentAnalysisTask
from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask, SentimentAnalysisTask, TranslationTask
from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset, GenericInstructionDataset, ReviewDataset

# TODO: Expand this to include extra information beyond just the task and dataset names
Expand All @@ -8,6 +8,7 @@
math_task, math_dataset = MathTask.name, [MathDataset.name]
date_qa_task, date_qa_dataset = DateQuestionAnsweringTask.name, [WikiDateDataset.name]
generic_instruction_task, generic_instruction_dataset = GenericInstructionTask.name, [GenericInstructionDataset.name]
translation_task, translation_dataset = TranslationTask.name, [WikiDataset.name]
sentiment_analysis_task, sentiment_analysis_dataset = SentimentAnalysisTask.name, [ReviewDataset.name]

TASK_REGISTRY = {
Expand All @@ -17,5 +18,6 @@
math_task: math_dataset,
date_qa_task: date_qa_dataset,
generic_instruction_task: generic_instruction_dataset,
translation_task: translation_dataset,
sentiment_analysis_task: sentiment_analysis_dataset,
}
3 changes: 2 additions & 1 deletion prompting/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
from .date_qa import DateQuestionAnsweringTask
from .generic_instruction import GenericInstructionTask
from .math import MathTask
from .translate import TranslationTask, TranslationPipeline
from .mock import MockTask
from .sentiment import SentimentAnalysisTask


TASKS = {
QuestionAnsweringTask.name: QuestionAnsweringTask,
DateQuestionAnsweringTask.name: DateQuestionAnsweringTask,
SummarizationTask.name: SummarizationTask,
#DebuggingTask.name: DebuggingTask,
GenericInstructionTask.name: GenericInstructionTask,
MathTask.name: MathTask,
TranslationTask.name: TranslationTask,
SentimentAnalysisTask.name: SentimentAnalysisTask,
}
130 changes: 130 additions & 0 deletions prompting/tasks/translate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import tqdm
import bittensor as bt
import argostranslate.package
import argostranslate.translate
import random
from typing import List, Tuple
from prompting.tasks import Task
from dataclasses import dataclass
from argostranslate.package import AvailablePackage
from prompting.shared import Context

SUPPORTED_LANGUAGES = [
"en", "es", "fr", "pt", "uk"
]

class TranslationPipeline:
def __init__(self):
self.supported_language_pairs = self.load_translation_packages()

def load_translation_packages(self) -> List[AvailablePackage]:
# Update package index and get available and installed packages
argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
installed_packages = argostranslate.package.get_installed_packages()

# Helper function to check if a package is installed
def is_package_installed(from_code, to_code, packages):
return any(pkg for pkg in packages if pkg.from_code == from_code and pkg.to_code == to_code)

# Filter available packages for supported language pairs
supported_language_pairs = [
pkg for pkg in available_packages
if pkg.from_code in SUPPORTED_LANGUAGES and pkg.to_code in SUPPORTED_LANGUAGES
]

bt.logging.info(f"Supported language pairs: {supported_language_pairs}")

# Check for installed packages
pbar = tqdm.tqdm(supported_language_pairs, desc="Checking installed packages")
for package in pbar:
if not is_package_installed(package.from_code, package.to_code, installed_packages):
bt.logging.info(f"Installing package from {package.from_code} to {package.to_code}")
package_path = str(package.download())
argostranslate.package.install_from_path(package_path)
bt.logging.success(f'Package successfully installed at {package_path}')
else:
bt.logging.info(f"Package from {package.from_code} to {package.to_code} is already installed, skipping...")

return supported_language_pairs

def random_translation(self, content:str) -> str:
# TODO: NOT RANDOM
from_code = self.SUPPORTED_LANGUAGES[0]
to_code = self.SUPPORTED_LANGUAGES[1]
return argostranslate.translate.translate(content, from_code, to_code)

def translate(self, content: str, from_code: str, to_code: str):
self.reference = argostranslate.translate.translate(content, from_code, to_code)

def translate_to_random_language(self, content: str, from_code:str='en') -> Tuple[AvailablePackage, str]:
english_supported_languages = list(filter(lambda x: x.from_code == from_code, self.supported_language_pairs))
available_translations = list(map(lambda x: x, english_supported_languages))

random_translation_obj = random.choice(available_translations)
translation_code = random_translation_obj.to_code

translated_content = argostranslate.translate.translate(content, from_code, to_code=translation_code)

return random_translation_obj, translated_content


@dataclass
class TranslationTask(Task):
name = "translation"
desc = "get translation help"
goal = "to get the translation for the given piece of text"

templates = [
"Could you assist me with translating the following text into {another_language}? \n{text}",
"I need some help translating this text into {another_language}. Can you do it? \n{text}",
"Is it possible for you to translate this text for me into {another_language}? Here it is: \n{text}",
"Would you mind helping me convert this text into {another_language}? \n{text}",
"Could you please convert this into {another_language} for me? \n{text}",
"I was wondering if you could help translate this into {another_language}? \n{text}",
"Can you provide a translation for this text into {another_language}? \n{text}",
"Hey, can you turn this text into {another_language} for me? \n{text}",
"Could I get some assistance in translating this into {another_language}? \n{text}",
"Are you able to help me render this text in {another_language}? \n{text}",
"I'd appreciate your help translating this text into {another_language}. Here's the text: \n{text}",
"Please could you translate the following text into {another_language}? \n{text}",
"Might you help me by translating this text to {another_language}? \n{text}",
"I'm looking for help to translate this text into {another_language}. Any chance you can assist? \n{text}",
"How about translating this text into {another_language} for me? \n{text}",
"Would it be possible for you to help translate this text into {another_language}? \n{text}",
"I need your expertise to translate this text into {another_language}, can you help? \n{text}",
"Can you work your magic and translate this text into {another_language}? \n{text}",
"I require assistance translating the following into {another_language}. Can you help? \n{text}",
"Hey, could you take a moment to translate this text into {another_language} for me? \n{text}",
]

# TODO: TEST BLEU SCORE
reward_definition = [
dict(name="rouge", ngram="rouge-1", metric="f", weight=1),
]
penalty_definition = [
dict(name="rouge", ngram="rouge-1", metric="f", weight=1),
]

cleaning_pipeline = []

def __init__(self, translation_pipeline: TranslationPipeline, context: Context):
# Set task internal variables
self.context = context
self.topic = context.title
self.subtopic = context.topic
self.tags = context.tags

# Translates english text to a random language
content_translation_obj, translated_content = translation_pipeline.translate_to_random_language(context.content)

# Translates the translation to another random language
reference_translation_obj, reference_translation_content = translation_pipeline.translate_to_random_language(content=translated_content, from_code=content_translation_obj.to_code)
self.reference = reference_translation_content

# Composes the query
# TODO: Implement template translation
template = random.choice(self.templates)
self.query = template.format(another_language=reference_translation_obj.to_name, text=translated_content)


1 change: 0 additions & 1 deletion prompting/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .datasets import (
Context,
Dataset,
MockDataset,
HFCodingDataset,
Expand Down
1 change: 0 additions & 1 deletion prompting/tools/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .context import Context
from .base import Dataset
from .code import HFCodingDataset, StackOverflowDataset
from .math import MathDataset
Expand Down
2 changes: 1 addition & 1 deletion prompting/tools/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import bittensor as bt

from ..selector import Selector
from .context import Context
from prompting.shared.context import Context
from prompting.utils.exceptions import MaxRetryError


Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ langchainhub==0.1.14
python-dotenv
wikipedia_sections
vllm
loguru
loguru
argostranslate
6 changes: 5 additions & 1 deletion tests/fixtures/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
DebuggingTask,
MathTask,
DateQuestionAnsweringTask,
TranslationTask
)
from prompting.tools import Context
from prompting.shared import Context
from .dataset import WIKI_CONTEXT, CODING_CONTEXT, MATH_CONTEXT, DATEQA_CONTEXT, MOCK_CONTEXT

TASKS = [
Expand All @@ -17,6 +18,8 @@
DebuggingTask,
MathTask,
DateQuestionAnsweringTask,
#TODO: Add proper separation for tranlation task tests
#TranslationTask
]

CONTEXTS = {
Expand All @@ -26,6 +29,7 @@
DebuggingTask: CODING_CONTEXT,
MathTask: MATH_CONTEXT,
DateQuestionAnsweringTask: DATEQA_CONTEXT,
#TranslationTask: WIKI_CONTEXT
}

TASK_FIELDS = {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .fixtures.dataset import DATASETS, CONTEXTS, CONTEXT_FIELDS
from prompting.tools.datasets import Dataset
from prompting.tools import Context
from prompting.shared import Context


@pytest.mark.parametrize("dataset", DATASETS)
Expand Down