# Demo of Translation Use Case with Lumigator SDK

### Lumigator Client
Check if client is up and running

In [1]:
from lumigator_sdk.lumigator import LumigatorClient

In [2]:
LUMI_HOST = "localhost:8000"
client = LumigatorClient(api_host=LUMI_HOST)
print(f"Connection is: {client.health.healthcheck().status}")

Connection is: OK


### Dataset
Using `Helsinki-NLP/opus-100` en-es dataset from Huggingface Datasets

In [3]:
import pandas as pd
from datasets import load_dataset

In [4]:
dataset = load_dataset("Helsinki-NLP/opus-100", "en-es", split="validation")

In [5]:
df_translation = pd.DataFrame(dataset["translation"])
df_translation.columns = ["en", "es"]
print(f"Loaded {len(df_translation)} validation pairs")
df_translation.head()

Loaded 2000 validation pairs


Unnamed: 0,en,es
0,I don't even remember what the fight was about.,No recuerdo por qué fue la pelea.
1,Here are the sites of each of those that have ...,Estos son los sitios en que cada Congreso ha t...
2,I'm the man who killed Blackbeard.,Sí. Soy el hombre que mató a Barbanegra.
3,Don't get smart.,No te hagas el inteligente.
4,Is there an exact moment in the life of a sold...,¿Existe un límite de cuándo se padece y cuándo...


In [6]:
# Rename the columns to examples and ground_truth
df_translation = df_translation.rename(columns={"en": "examples", "es": "ground_truth"})
filename = "translation_en_es.csv"
df_translation = df_translation.sample(10)
df_translation.to_csv(filename, index=False)  # Save the file locally with 10 samples

### Upload Dataset

In [7]:
from pathlib import Path

from lumigator_schemas.datasets import DatasetFormat

# Upload that file that we created earlier
with Path.open(Path(filename), "r") as file:
    data = file.read()
dataset_response = client.datasets.create_dataset(dataset=data, format=DatasetFormat.JOB)
dataset_id = dataset_response.id
print(f"Dataset uploaded and has ID: {dataset_id}")

Dataset uploaded and has ID: 5b8188b9-1217-4cab-b9d2-cbd3fdbea9a1


### Create Experiment

In [8]:
from lumigator_schemas.experiments import ExperimentCreate

# Task as translation with necessary params
task_definition = {
    "task": "translation",
    "source_language": "English",
    "target_language": "Spanish",
}
max_samples = 5

request = ExperimentCreate(
    name="Translation Experiment EN to ES",
    description="With Opus-NLP dataset",
    dataset=dataset_id,
    task_definition=task_definition,
    max_samples=max_samples,
)

experiment_response = client.experiments.create_experiment(request)
experiment_id = experiment_response.id
print(f"Experiment created and has ID: {experiment_id}")

Experiment created and has ID: 2


### Create and Run Workflows
- One with Open AI GPT-4o-mini
- One with Mistral-7B

In [9]:
from lumigator_schemas.workflows import WorkflowCreateRequest

custom_system_prompt = """
You are an expert in English and Spanish. 
Please provide a high-quality translation of the following text from English to Spanish.
Only generate the translated text. No additional text or explanation needed.
"""

configurations = [
    # OpenAI GPt-4o-mini no explicit system prompt - uses default under the hood
    {"name": "Translation with gpt-4o-mini", "model": "gpt-4o-mini", "provider": "openai"},
    # Mistral 7B with custom system prompt
    {
        "name": "Translation with open-mistral-7b",
        "model": "open-mistral-7b",
        "provider": "mistral",
        "system_prompt": custom_system_prompt,
    },
]


for config in configurations:
    params = {
        "name": config["name"],
        "model": config["model"],
        "provider": config["provider"],
        "dataset": dataset_id,
        "experiment_id": experiment_id,
        "task_definition": task_definition,
        "max_samples": max_samples,
    }
    params.update(config)
    request = WorkflowCreateRequest(**params)
    created_workflow = client.workflows.create_workflow(request)
    print(f"Created workflow {created_workflow.name} with ID {created_workflow.id} for model {created_workflow.model}")

Created workflow Translation with gpt-4o-mini with ID 0d4a304e900b413796d5e6a559b13525 for model gpt-4o-mini
Created workflow Translation with open-mistral-7b with ID eb49365aee7544cf8d0e312465eeea34 for model open-mistral-7b


### Models Endpoint
- List models supported for task(s)

In [10]:
### Get the list of models supported for translation
models_response = client.models.get_suggested_models(tasks=["translation"])
for model_config in models_response.items:
    print(model_config.model)

gpt-4o-mini
gpt-4o
deepseek-reasoner
deepseek-chat
open-mistral-7b
mistralai/Mistral-7B-Instruct-v0.2
