# Demo of Translation Use Case with Lumigator SDK

### Lumigator Client
Check if client is up and running

In [1]:
from lumigator_sdk.lumigator import LumigatorClient

In [2]:
LUMI_HOST = "localhost:8000"
client = LumigatorClient(api_host=LUMI_HOST)
print(f"Connection is: {client.health.healthcheck().status}")

Connection is: OK


### Dataset
Using `google/wmt24pp` datasets from Huggingface Datasets, released as a part of [WMT24++: Expanding the Language Coverage of WMT24 to 55 Languages & Dialects](https://arxiv.org/abs/2502.12404v1)

In [3]:
import pandas as pd
from datasets import load_dataset

In [4]:
source_language = "English"
target_language = "German"
language_pair = "en-de_DE"
dataset = load_dataset("google/wmt24pp", language_pair, split="train")

In [5]:
df_translation = pd.DataFrame(dataset[1:])[["source", "target"]]
print(f"Loaded {len(df_translation)} sentence pairs")
df_translation.head()

Loaded 997 sentence pairs


Unnamed: 0,source,target
0,"Siso's depictions of land, water center new ga...",Sisos Darstellungen von Land und Wasser in neu...
1,"""People Swimming in the Swimming Pool"" from 20...",„People Swimming in the Swimming Pool“ aus dem...
2,"Tierra del Sol is pleased to present ""Vicente ...",„Vicente Siso: Memories of the Land and Water“...
3,"Masterfully working across subject matter, Sis...","In zahlreichen Serien von Landschaften, Porträ..."
4,The Tierra del Sol Gallery is located at 7414 ...,"Galerie „Tierra del Sol“, 7414 Santa Monica Bl..."


In [6]:
# Rename the columns to examples and ground_truth
df_translation = df_translation.rename(columns={"source": "examples", "target": "ground_truth"})
filename = "translation_eval_dataset.csv"
df_translation = df_translation.head(10)
df_translation.to_csv(filename, index=False)  # Save the file locally with 10 samples

### Upload Dataset

In [7]:
from pathlib import Path

from lumigator_schemas.datasets import DatasetFormat

# Upload that file that we created earlier
with Path.open(Path(filename), "r") as file:
    data = file.read()
dataset_response = client.datasets.create_dataset(dataset=data, format=DatasetFormat.JOB)
dataset_id = dataset_response.id
print(f"Dataset uploaded and has ID: {dataset_id}")

Dataset uploaded and has ID: 10a0a229-7bc4-42c5-bea0-6758cffd85d9


### Create Experiment

In [8]:
from lumigator_schemas.experiments import ExperimentCreate

# Task as translation with necessary params
task_definition = {
    "task": "translation",
    "source_language": source_language,
    "target_language": target_language,
}
max_samples = 5

request = ExperimentCreate(
    name=f"Demo Experiment {source_language} to {target_language}",
    description="Experiment for demo",
    dataset=dataset_id,
    task_definition=task_definition,
    max_samples=max_samples,
)

experiment_response = client.experiments.create_experiment(request)
experiment_id = experiment_response.id
print(f"Experiment created and has ID: {experiment_id}")

Experiment created and has ID: 9


### Create and Run Workflows
- One with API-based Open AI GPT-4o-mini
- One with locally hosted Ollama model (Pre-requisite: `ollama run qwen2.5`)
- One with a multilingual model on HuggingFace
- One with a bilingual Opus-MT model on HuggingFace

In [10]:
from lumigator_schemas.workflows import WorkflowCreateRequest

custom_system_prompt = f"""
You are an expert in {source_language} and {target_language}. 
Please provide a high-quality translation of the following text from {source_language} to {target_language}.
Only generate the translated text. No additional text or explanation needed.
"""
batch_size = 5
metrics = ["bleu", "meteor", "comet"]
configurations = [
    # OpenAI GPt-4o-mini no explicit system prompt - uses default prompt set by Lumigator under the hood
    {
        "name": "Translation with gpt-4o-mini",
        "model": "gpt-4o-mini",
        "provider": "openai",
        "secret_key_name": "openai_api_key",
    },
    # Locally hosted Ollama model with custom system prompt
    {
        "name": "Translation with ollama qwen2.5",
        "model": "qwen2.5",
        "provider": "openai",
        "base_url": "http://localhost:11434/v1",
        "system_prompt": custom_system_prompt,
        "secret_key_name": "openai_api_key",
    },
    # HuggingFace multi-lingual model
    {
        "name": "Translation with HF m2m100",
        "model": "facebook/m2m100_418M",
        "provider": "hf",
    },
    # HuggingFace Opus-MT bi-lingual model
    {
        "name": "Translation with Helsinki-NLP/opus-mt",
        "model": "Helsinki-NLP/opus-mt-en-de",
        "provider": "hf",
    },
]

workflow_ids = []
for config in configurations:
    params = {
        "name": config["name"],
        "model": config["model"],
        "experiment_id": experiment_id,
        "batch_size": batch_size,
        "metrics": metrics,
    }
    params.update(config)
    request = WorkflowCreateRequest(**params)
    created_workflow = client.workflows.create_workflow(request)
    print(f"Created workflow {created_workflow.name} with ID {created_workflow.id} for model {created_workflow.model}")
    workflow_ids.append(created_workflow.id)

Created workflow Translation with gpt-4o-mini with ID fa5618d564594d7b8f6a729b63cb57c2 for model gpt-4o-mini
Created workflow Translation with ollama qwen2.5 with ID 76ffee9bda2640dbb3074dc0d39e18de for model qwen2.5
Created workflow Translation with HF m2m100 with ID 05b3b3d39e7f4456bc8ab683da309970 for model facebook/m2m100_418M
Created workflow Translation with Helsinki-NLP/opus-mt with ID f5462c0382d046678d0178a0fcb65b16 for model Helsinki-NLP/opus-mt-en-de


### View Results
Check http://localhost/experiments for status. Once the experiment status is `SUCCEEDED`, proceed further. 

In [19]:
workflow_results = [client.workflows.get_workflow(workflow_id) for workflow_id in workflow_ids]
df_metrics = pd.concat([pd.DataFrame.from_dict({wr.model: wr.metrics}).T for wr in workflow_results])
df_metrics[["meteor_meteor_mean", "bleu_bleu_mean", "comet_mean_score"]]

Unnamed: 0,meteor_meteor_mean,bleu_bleu_mean,comet_mean_score,bertscore_f1_mean,rouge_rougeL_mean
Helsinki-NLP/opus-mt-en-de,0.602,0.223,0.585,0.906,0.573


In [20]:
output_texts_per_model = {
    wr.model: pd.read_json(wr.artifacts_download_url)["artifacts"][
        ["examples", "ground_truth", "predictions"]
    ].to_dict()
    for wr in workflow_results
}
for model_name, df_texts in output_texts_per_model.items():
    print(f"Model: {model_name}")
    display(pd.DataFrame(df_texts).head(2))

Model: Helsinki-NLP/opus-mt-en-de


Unnamed: 0,examples,ground_truth,predictions
0,"Siso's depictions of land, water center new ga...",Sisos Darstellungen von Land und Wasser in neu...,"Siso's Darstellungen von Land, Wasserzentrum n..."
1,"""People Swimming in the Swimming Pool"" from 20...",„People Swimming in the Swimming Pool“ aus dem...,"""People Swimming in the Swimming Pool"" von 202..."


### Models Endpoint
- List models supported for task(s)

In [21]:
### Get the list of models supported for translation
models_response = client.models.get_suggested_models(tasks=["translation"])
for model_config in models_response.items:
    print(model_config.model)

facebook/m2m100_418M
facebook/m2m100_1.2B
Helsinki-NLP/opus-mt
bigscience/mt0-base
bigscience/mt0-large
bigscience/mt0-xl
gpt-4o-mini
gpt-4o
deepseek-reasoner
deepseek-chat
ministral-8b-latest
mistralai/Mistral-7B-Instruct-v0.2
