## Environment Preparation

In [29]:
%pip install python-dotenv > /dev/null 2>&1
%pip install datasets > /dev/null 2>&1
%pip install nltk > /dev/null 2>&1
%pip install openai > /dev/null 2>&1
%pip install tenacity > /dev/null 2>&1

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

False

## Utils

### Create Translator

Create various translator to translate sentences.

In [19]:
from abc import ABC, abstractmethod

# Define the abstract translator class that will be implemented by the different translators
# The translator class will have a translate method that will take a text, source language and target language and return the translated text
class Translator(ABC):
    @abstractmethod
    def translate(self, text: str, source_lang:str, target_lang:str) -> str:
        pass

#### Azure OpenAI

In [30]:
from openai import AzureOpenAI
from tenacity import retry, stop_after_attempt, wait_exponential

class InvalidSystemPromptError(Exception):
    pass


class AzureOpenAITranslator(Translator):
    def __init__(
        self,
        endpoint: str,
        subscription_key: str,
        deployment: str,
        api_version: str = "2024-02-01",
        system_prompt: str = "translate from <source_lang> to <target_lang>",
        **kwargs,
    ):
        
        if "<source_lang>" not in system_prompt or "<target_lang>" not in system_prompt:
            raise InvalidSystemPromptError("system_prompt must contain <source_lang> and <target_lang> placeholders.")
         
        self._translator = AzureOpenAI(
            azure_endpoint=endpoint,
            api_key=subscription_key,
            api_version=api_version,
            **kwargs,
        )
        self._deployment = deployment
        self._system_prompt = system_prompt

    @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=10, min=10, max=60))
    def translate(self, text: str, source_lang: str, target_lang: str) -> str:
        response = self._translator.chat.completions.create(
            model=self._deployment,
            messages=[
                {
                    "role": "system",
                    "content": self._system_prompt.replace(
                        "<source_lang>", source_lang
                    ).replace("<target_lang>", target_lang),
                },
                {"role": "user", "content": text},
            ],
        )

        return (
            response.choices[0].message.content
            if response.choices[0].message.content is not None
            else ""
        )

### Create Benchmark Evaluator

In [37]:
from abc import ABC, abstractmethod
from typing import Literal

# Define the abstract evaluator class that will be implemented by the different evaluators
# The evaluator class will have a score method that will take a list of references and a list of candidates and return the score
class Evaluator(ABC):
    @abstractmethod
    def score(self, references: list[str], candidate: str) -> list[int] | Literal[0]:
        pass

#### BLEU

In [38]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from typing import Literal

class BLEUSentenceEvaluator(Evaluator):
    def __init__(self, smoothing_function = SmoothingFunction().method1, weights: tuple[float, float, float, float] = (0.25, 0.25, 0.25, 0.25)):
        # Smoothing function is used to avoid the warning message when the candidate is empty and the BLEU score is 0
        # This is because the smoothing function will smooth the 0 score to a very small positive value
        self._smoothing_function = smoothing_function
        self._weights = weights
        pass

    def score(self, references: list[str], candidate: str) -> list[int] | Literal[0]:

        reference = [ref.split() for ref in references]
        candidates = candidate.split()

        return sentence_bleu(reference, candidates, smoothing_function=self._smoothing_function, weights=self._weights)

### Evaluate Translation Funtion

In [40]:
# Define the function that will evaluate the translation
def evaluate_translation(translator: Translator, evaluator: Evaluator, source_text: str, source_lang: str, target_lang: str, references: list[str], verbose: bool = False):
    translated_text = translator.translate(source_text, source_lang, target_lang)
    score = evaluator.score(references, translated_text)

    if verbose:
        print(f"Source Text: {source_text}")
        print(f"Translated Text: {translated_text}")
        print(f"BLEU Score: {score}")

    return score, translated_text

## Testing Playground

### Load Dataset

Load dataset from huggingface.

In [41]:
import datasets.config
import os

# Set the cache directory
cache_dir = os.path.join(os.getcwd(), '.cache/')

if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

datasets.config.HF_DATASETS_CACHE = cache_dir

In [42]:
from datasets import load_dataset

ds = load_dataset("5CD-AI/Vietnamese-mahiatlinux-Reflection-Dataset-ShareGPT-v2-gg-translated")
print(ds)

DatasetDict({
    train: Dataset({
        features: ['conversations_vi', 'conversations_en'],
        num_rows: 9171
    })
})


### Execute Testing

In [None]:
import os

azure_translator = AzureOpenAITranslator(
    endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT", ""),
    subscription_key=os.environ.get("AZURE_OPENAI_KEY", ""),
    deployment="gpt-4o", # change your deployment here, and make sure it is available in your Azure OpenAI account
)

evaluator = BLEUSentenceEvaluator()

total_score = 0
for i in range(5):
    source_text = ds["train"][i]["conversations_en"][0]["value"]
    source_lang = "en"
    target_lang = "vi"
    references = [ds["train"][i]["conversations_vi"][0]["value"]]

    score, translated = evaluate_translation(azure_translator, evaluator, source_text, source_lang, target_lang, references)

    if isinstance(score, list):
        total_score += sum(score)
    else:
        total_score += score

print(f"Average BLEU Score: {total_score/5}")