# Automating Product Listing Translation Using Large Language Models and Structured Outputs

## Importing the Necessary Libraries

In [25]:
import json
from enum import Enum

import pandas as pd

from google import genai
from google.genai.types import HttpOptions

from pydantic import BaseModel

## Reading the Dataset

In [26]:
df_source = pd.read_csv(
    "https://raw.githubusercontent.com/muw78/automating-product-listing-translation-using-llms-and-structured-outputs/refs/heads/main/source_listing.tsv",
    sep="\t",
)
df_source.fillna("", inplace=True)  # Replace empty cells with empty strings
source_listing = df_source.to_dict(orient="records")

## Defining the Structured Output Format

In [27]:
class ParentChild(str, Enum):
    PARENT = "parent"
    CHILD = "child"


class AmazonSKU(BaseModel):
    parent_child: ParentChild
    item_sku: str
    brand: str
    item_name: str
    bullet_point1: str
    bullet_point2: str
    bullet_point3: str
    bullet_point4: str
    bullet_point5: str
    product_description: str
    color_name: str
    size_name: str


class AmazonListing(BaseModel):
    skus: list[AmazonSKU]

## Generating the Prompt

In [28]:
target_language = "German"

prompt_template = """
Translate the following Amazon listing into **{target_language}**.

- CRITICAL: Do NOT translate the values for `parent_child`, `item_sku`, or `brand`.
- The `item_name` should always start with the brand name.
- Use clear, professional, and descriptive language appropriate for the product category.

```json
{source_listing}
```
"""

In [29]:
prompt = prompt_template.format(
    target_language=target_language,
    source_listing=json.dumps(source_listing, indent=4),
)

## Initializing the Gemini Client

In [30]:
GEMINI_TIMEOUT = 3 * 60 * 1000  # 3 minutes in milliseconds
genai_client = genai.Client(http_options=HttpOptions(timeout=GEMINI_TIMEOUT))

## Sending the Request to the Gemini API

In [31]:
response = genai_client.models.generate_content(
    model="gemini-2.5-pro",
    contents=[prompt],
    config={
        "response_mime_type": "application/json",
        "response_schema": AmazonListing,
        "temperature": 0.2,
    },
)

# Parsing the Response

In [32]:
result_string = response.text.strip()
result = json.loads(result_string)
translated_listing = result["skus"]

In [34]:
all(
    (source["item_sku"], source["brand"], source["parent_child"])
    == (translated["item_sku"], translated["brand"], translated["parent_child"])
    for source, translated in zip(source_listing, translated_listing)
)  # Should return True

In [None]:
df_translated = pd.DataFrame(translated_listing)