# Automating Product Listing Translation Using Large Language Models and Structured Outputs

## Importing the Necessary Libraries

In [1]:
import json
from enum import Enum

import pandas as pd

from google import genai
from google.genai.types import HttpOptions

from pydantic import BaseModel

## Reading the Dataset

In [2]:
df_source = pd.read_csv(
    "https://raw.githubusercontent.com/muw78/automating-product-listing-translation-using-llms-and-structured-outputs/refs/heads/main/source_listing.tsv",
    sep="\t",
)
df_source.fillna("", inplace=True)  # Replace empty cells with empty strings
source_listing = df_source.to_dict(orient="records")

In [3]:
df_source

Unnamed: 0,parent_child,item_sku,brand,item_name,bullet_point1,bullet_point2,bullet_point3,bullet_point4,bullet_point5,product_description,color_name,size_name
0,parent,TSHIRT-CLASSIC,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,,This men's t-shirt is a staple item for any wa...,,
1,child,TSHIRT-CLASSIC-RED-S,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Red,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Red, Size: S",This men's t-shirt is a staple item for any wa...,Red,S
2,child,TSHIRT-CLASSIC-RED-M,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Red,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Red, Size: M",This men's t-shirt is a staple item for any wa...,Red,M
3,child,TSHIRT-CLASSIC-RED-L,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Red,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Red, Size: L",This men's t-shirt is a staple item for any wa...,Red,L
4,child,TSHIRT-CLASSIC-ORN-S,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Orange,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Orange, Size: S",This men's t-shirt is a staple item for any wa...,Orange,S
5,child,TSHIRT-CLASSIC-ORN-M,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Orange,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Orange, Size: M",This men's t-shirt is a staple item for any wa...,Orange,M
6,child,TSHIRT-CLASSIC-ORN-L,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Orange,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Orange, Size: L",This men's t-shirt is a staple item for any wa...,Orange,L
7,child,TSHIRT-CLASSIC-YLW-S,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Yellow,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Yellow, Size: S",This men's t-shirt is a staple item for any wa...,Yellow,S
8,child,TSHIRT-CLASSIC-YLW-M,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Yellow,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Yellow, Size: M",This men's t-shirt is a staple item for any wa...,Yellow,M
9,child,TSHIRT-CLASSIC-YLW-L,My Cool Brand,My Cool Brand Men's Crewneck T-Shirt - Yellow,Material: 100% Cotton,Fit: Classic Regular Fit,Comfort: Tagless Neck Label,Quality: Durable Double Stitching,"Color: Yellow, Size: L",This men's t-shirt is a staple item for any wa...,Yellow,L


In [4]:
source_listing[:2]

[{'parent_child': 'parent',
  'item_sku': 'TSHIRT-CLASSIC',
  'brand': 'My Cool Brand',
  'item_name': "My Cool Brand Men's Crewneck T-Shirt",
  'bullet_point1': 'Material: 100% Cotton',
  'bullet_point2': 'Fit: Classic Regular Fit',
  'bullet_point3': 'Comfort: Tagless Neck Label',
  'bullet_point4': 'Quality: Durable Double Stitching',
  'bullet_point5': '',
  'product_description': "This men's t-shirt is a staple item for any wardrobe. It offers a comfortable fit and is made from soft, breathable fabric.",
  'color_name': '',
  'size_name': ''},
 {'parent_child': 'child',
  'item_sku': 'TSHIRT-CLASSIC-RED-S',
  'brand': 'My Cool Brand',
  'item_name': "My Cool Brand Men's Crewneck T-Shirt - Red",
  'bullet_point1': 'Material: 100% Cotton',
  'bullet_point2': 'Fit: Classic Regular Fit',
  'bullet_point3': 'Comfort: Tagless Neck Label',
  'bullet_point4': 'Quality: Durable Double Stitching',
  'bullet_point5': 'Color: Red, Size: S',
  'product_description': "This men's t-shirt is a st

## Defining the Structured Output Format

In [5]:
class ParentChild(str, Enum):
    PARENT = "parent"
    CHILD = "child"


class AmazonSKU(BaseModel):
    parent_child: ParentChild
    item_sku: str
    brand: str
    item_name: str
    bullet_point1: str
    bullet_point2: str
    bullet_point3: str
    bullet_point4: str
    bullet_point5: str
    product_description: str
    color_name: str
    size_name: str


class AmazonListing(BaseModel):
    skus: list[AmazonSKU]

## Generating the Prompt

In [6]:
target_language = "German"

prompt_template = """
Translate the following Amazon listing into **{target_language}**.

- CRITICAL: Do NOT translate the values for `parent_child`, `item_sku`, or `brand`.
- The `item_name` should always start with the brand name.
- Use clear, professional, and descriptive language appropriate for the product category.

```json
{source_listing}
```
"""

In [7]:
prompt = prompt_template.format(
    target_language=target_language,
    source_listing=json.dumps(source_listing, indent=4),
)

## Initializing the Gemini Client

In [8]:
GEMINI_TIMEOUT = 3 * 60 * 1000  # 3 minutes in milliseconds
genai_client = genai.Client(http_options=HttpOptions(timeout=GEMINI_TIMEOUT))

## Sending the Request to the Gemini API

In [9]:
response = genai_client.models.generate_content(
    model="gemini-2.5-pro",
    contents=[prompt],
    config={
        "response_mime_type": "application/json",
        "response_schema": AmazonListing,
        "temperature": 0.2,
    },
)

# Parsing the Response

In [10]:
result_string = response.text.strip()
result = json.loads(result_string)
translated_listing = result["skus"]

In [11]:
translated_listing[:2]  # Show the first two translated SKUs

[{'parent_child': 'parent',
  'item_sku': 'TSHIRT-CLASSIC',
  'brand': 'My Cool Brand',
  'item_name': 'My Cool Brand Herren T-Shirt mit Rundhalsausschnitt',
  'bullet_point1': 'Material: 100% Baumwolle',
  'bullet_point2': 'Passform: Klassischer Regular Fit',
  'bullet_point3': 'Komfort: Etikettenloses Nackenlabel',
  'bullet_point4': 'Qualität: Strapazierfähige Doppelnähte',
  'bullet_point5': '',
  'product_description': 'Dieses Herren-T-Shirt ist ein unverzichtbarer Bestandteil jeder Garderobe. Es bietet eine bequeme Passform und ist aus weichem, atmungsaktivem Stoff gefertigt.',
  'color_name': '',
  'size_name': ''},
 {'parent_child': 'child',
  'item_sku': 'TSHIRT-CLASSIC-RED-S',
  'brand': 'My Cool Brand',
  'item_name': 'My Cool Brand Herren T-Shirt mit Rundhalsausschnitt - Rot',
  'bullet_point1': 'Material: 100% Baumwolle',
  'bullet_point2': 'Passform: Klassischer Regular Fit',
  'bullet_point3': 'Komfort: Etikettenloses Nackenlabel',
  'bullet_point4': 'Qualität: Strapazie

In [12]:
# Verify that the `item_sku`, `brand`, and `parent_child` fields are unchanged

all(
    (source["item_sku"], source["brand"], source["parent_child"])
    == (translated["item_sku"], translated["brand"], translated["parent_child"])
    for source, translated in zip(source_listing, translated_listing)
)  # Should return True

True

In [13]:
df_translated = pd.DataFrame(translated_listing)

In [14]:
df_translated

Unnamed: 0,parent_child,item_sku,brand,item_name,bullet_point1,bullet_point2,bullet_point3,bullet_point4,bullet_point5,product_description,color_name,size_name
0,parent,TSHIRT-CLASSIC,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,,Dieses Herren-T-Shirt ist ein unverzichtbarer ...,,
1,child,TSHIRT-CLASSIC-RED-S,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Rot, Größe: S",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Rot,S
2,child,TSHIRT-CLASSIC-RED-M,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Rot, Größe: M",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Rot,M
3,child,TSHIRT-CLASSIC-RED-L,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Rot, Größe: L",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Rot,L
4,child,TSHIRT-CLASSIC-ORN-S,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Orange, Größe: S",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Orange,S
5,child,TSHIRT-CLASSIC-ORN-M,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Orange, Größe: M",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Orange,M
6,child,TSHIRT-CLASSIC-ORN-L,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Orange, Größe: L",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Orange,L
7,child,TSHIRT-CLASSIC-YLW-S,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Gelb, Größe: S",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Gelb,S
8,child,TSHIRT-CLASSIC-YLW-M,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Gelb, Größe: M",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Gelb,M
9,child,TSHIRT-CLASSIC-YLW-L,My Cool Brand,My Cool Brand Herren T-Shirt mit Rundhalsaussc...,Material: 100% Baumwolle,Passform: Klassischer Regular Fit,Komfort: Etikettenloses Nackenlabel,Qualität: Strapazierfähige Doppelnähte,"Farbe: Gelb, Größe: L",Dieses Herren-T-Shirt ist ein unverzichtbarer ...,Gelb,L
