In [7]:
from openai import OpenAI
import pandas as pd
import json
from pydantic import BaseModel
from dotenv import load_dotenv
import os
from pathlib import Path
from src.utils import send_mac_notification

### Prompt

In [8]:
tasks = {
    "blogposts": "Translate Luxreaders Blog Posts."
}

markets = [
    "da (danish)",
    "sv (swedish)",
    "nb (norwegian)",
    "fi (finnish)",
    "de (german)",
    "nl (dutch)",
    "fr (french)"
]

root_domains = {
    "da (danish)": ".dk",
    "sv (swedish)": ".se",
    "nb (norwegian)": ".no",
    "fi (finnish)": ".fi",
    "de (german)": ".de",
    "nl (dutch)": ".nl",
    "fr (french)": ".fr"
}

glossary = {
    "3 pairs of glasses for [price]": {
        "fr (french)": "Trois paires de lunettes pour [price]",
        "de (german)": "3 Brillen für [price]",
        "da (danish)": "3 par briller for [price]",
        "fi (finnish)": "Kolme paria silmälaseja hintaan [price]",
        "nl (dutch)": "3 paar brillen voor [price]",
        "nb (norwegian)": "3 par briller for [price]",
        "sv (swedish)": "3 par glasögon för [price]"
    },
    "blue light glasses": {
        "fr (french)": "lunettes anti-lumière bleue",
        "de (german)": "blaufilter brillen",
        "da (danish)": "blue light briller",
        "fi (finnish)": "sinivalolasit",
        "nl (dutch)": "blauw licht filter brillen",
        "nb (norwegian)": "blue light briller",
        "sv (swedish)": "blue light glasögon"
    },
    "combine any glasses": {
        "fr (french)": "combinez n'importe quelles lunettes",
        "de (german)": "kombinieren sie jede unserer brille",
        "da (danish)": "mix og match som du ønsker",
        "fi (finnish)": "yhdistä kaikki lasit",
        "nl (dutch)": "combineer alle brillen",
        "nb (norwegian)": "kombiner alle briller",
        "sv (swedish)": "kombinera alla glasögon"
    },
    "reading glasses": {
        "fr (french)": "lunettes de lecture",
        "de (german)": "lesebrillen",
        "da (danish)": "læsebriller",
        "fi (finnish)": "lukulasit",
        "nl (dutch)": "leesbrillen",
        "nb (norwegian)": "lesebriller",
        "sv (swedish)": "läsglasögon"
    },
    "strength (of lenses)": {
        "fr (french)": "Dioptrie",
        "de (german)": "Stärke",
        "da (danish)": "Styrke",
        "fi (finnish)": "Vahvuus",
        "nl (dutch)": "Sterkte",
        "nb (norwegian)": "Styrke",
        "sv (swedish)": "Styrka"
    },
    "sunglasses": {
        "fr (french)": "lunettes de soleil",
        "de (german)": "sonnenbrillen",
        "da (danish)": "solbriller",
        "fi (finnish)": "aurinkolasit",
        "nl (dutch)": "zonnebrillen",
        "nb (norwegian)": "solbriller",
        "sv (swedish)": "solglasögon"
    }
}

prices = {
    "£29": {
        "da (danish)": "249 kr.",
        "sv (swedish)": "349 kr",
        "nb (norwegian)": "349 kr",
        "fi (finnish)": "35 €",
        "de (german)": "35 €",
        "fr (french)": "35 €",
        "nl (dutch)": "€35"
    },
    "£59": {
        "da (danish)": "499 kr.",
        "sv (swedish)": "699 kr",
        "nb (norwegian)": "699 kr",
        "fi (finnish)": "69 €",
        "de (german)": "69 €",
        "fr (french)": "69 €",
        "nl (dutch)": "€69"
    },
    "£99": {
        "da (danish)": "899 kr.",
        "sv (swedish)": "1,299 kr",
        "nb (norwegian)": "1,299 kr",
        "fi (finnish)": "119 €",
        "de (german)": "119 €",
        "fr (french)": "119 €",
        "nl (dutch)": "€119"
    }
}

system_prompt_translation = f"""
You are a translator returning JSON-formatted lists. Your task is: {tasks["blogposts"]}

# Context and Style
Luxreaders sells reading glasses, blue light glasses, and sunglasses, focusing on Scandinavian-inspired, stylish, and affordable eyewear. Tone should be friendly, clear, and modern.

## Glossary
Use the glossary below for specific terms; adapt forms (plural, definite article, etc.) as needed:
{glossary}

## Product Names
Never translate product names (e.g., "Men's Oslo København Black" or "København Black").

## Prices
Use this price dictionary; if a price isn't listed, do a reasonable currency conversion:
{prices}

## URLs & Domains
Only change the top-level domain for each market:
{root_domains}

## Examples
- "3 Essential glasses for £59" → "3 Essential briller for 499 kr."
  - Notice "Essential" is not translated, currency is updated, and glossary is used.

## Important
1. Translate idioms and humor carefully, preserving intent.
2. Remain culturally sensitive.
3. Maintain brand tone: friendly, approachable, and stylish.
4. Return your output as valid JSON (a list of translated strings).
5. Input: a market code plus a list of English strings. Output: the translated list in JSON.
6. ALWAYS ASK YOURSELF: HOW WOULD A NAITIVE SPEAKER FROM THE LANGUAGE YOU ARE TRANSLATING TO SAY THIS SCENTENCE. DON'T BE AFRAID TO CHNAGE THE TEXT MAKE IT MORE NATURAL IN THE LANGUAGE YOU ARE TRANSLATING TO
7. **DO NOT ADD OR REMOVE** any items in the list.
8. **DO NOT split, combine, or introduce new sentences.** 
9. DO NOT BREAK THE CODE
"""

### Libraries, API key and export df

In [9]:
# Structure for AI structured outputs
class Article(BaseModel):
    content: list[str]

# Get API key
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# load initial csv exported from Shopify
filepath = Path("../data/exported_empty_translations.csv")
df = pd.read_csv(filepath)

### OpenAI translation

In [10]:
# Calling the OpenAI API
def translate_json(article_list, lang):
    response = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt_translation},
            {
                "role": "user",
                "content": f"Translate the following list of strings to {lang} and return only a JSON array:\n\n{article_list}"
            }
        ],
        response_format=Article,
    )
    return json.loads(response.choices[0].message.content)["content"]

### OpenAI grammar

In [11]:
def correct_grammar(article_list, lang):
    response = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"""
             You get a list of strings (some can have html from Shopify). The strings have been translated from English (en) to {lang}, but grammar mistakes and weird formulations might have been appeared as a result of the translation. 
             Your job, as a world class text writing expert, is to correct grammar, improve formulations and return only a JSON array. 
             VERY IMPORTANT: Don't break the code, keep it intact so it can be imported to Shopify again. Keep the list length the same.
             ALWAYS ASK YOURSELF, HOW WOULD A LOCAL PERSON WRITE THIS SENTENCE GIVEN THE CONTEXT. DON'T BE AFRAID TO CHANGE A SCENTENCE TO IMPROVE IT, BUT NEVER BREAK THE CODE.
             ### STRICT RULES:
             **DO NOT ADD OR REMOVE** any items in the list.
             The output **must have the exact same number of elements** as the input.
             Return **only** a JSON array of strings with the HTML still intact. **No explanations, no extra text.**
             If a sentence feels unclear, **rewrite it in a better way** but **keep it as a single string**.
             **DO NOT split, combine, or introduce new sentences.** 
             **DO NOT BREAK THE CODE**
             """},
            {
                "role": "user",
                "content": f"The following article is in the language {lang}. Do your job and return only a JSON array:\n\n{article_list}"
            }
        ],
        response_format=Article,
    )
    return json.loads(response.choices[0].message.content)["content"]

### Client

In [16]:
locales = df["Locale"].unique().tolist()
articles = df["Article name"].unique().tolist()

max_retries = 3  # Set the maximum number of retries if translation fails

for article in articles:
    article_original_list = df[(df["Article name"] == article) & (df["Locale"] == "da")]["Default content"].to_list()  # Turn original article into list
    
    for lang in locales:
        attempts = 0
        success = False

        while attempts < max_retries and not success:
            print(f"Now translating {article} to {lang}, attempt {attempts + 1}")
            article_list_translated = translate_json(article_original_list, lang)  # Translate as a list using AI
            
            print(f"Now correcting grammar for {article} in {lang}")
            article_list_translated = correct_grammar(article_list_translated, lang)  # Correct grammar using AI

            if len(article_original_list) == len(article_list_translated):
                df.loc[(df["Article name"] == article) & (df["Locale"] == lang), "Translated content"] = article_list_translated
                success = True  # Mark the translation as successful
            else:
                attempts += 1
                print(f"Retry {attempts} for {article} in {lang} due to length mismatch.")

                if attempts == max_retries:
                    send_mac_notification("Oh, no!", "Your translation has failed")
                    raise ValueError(f"Length mismatch for article {article} and language {lang}. "
                                     f"Expected {len(article_original_list)}, but got {len(article_list_translated)}.")

        print(f"{article} to {lang} done", end="\n")
        df.to_csv(Path("../data/to_import_filled_translations.csv")) # saves repedetly if something fails

send_mac_notification("Hello, Philip!", "Your translation is done")

Now translating what-is-blue-light to da, attempt 1


KeyboardInterrupt: 