In [None]:
%pip install google google.genai --q

In [None]:
from google.colab import userdata
from google.colab import drive
from os import chdir

drive.mount("/content/drive")
chdir("/content/drive/MyDrive/Google AI Studio")

In [None]:
from google.genai import Client
from google.genai.types import (
    Content,
    Part,
    SafetySetting,
    GenerateContentConfig,
    HarmCategory,
    HarmBlockThreshold,
    FinishReason,
)
from json import dump, load
from os.path import exists
from pydantic import BaseModel
from typing import Sequence


class JokeEntry(BaseModel):
    body: str
    id: int
    rating: float


def iter_batches(array: Sequence, batch_size=32):
    for i in range(0, len(array), batch_size):
        yield array[i : i + batch_size]


prompt = """Traduce estos chistes al español, para cada objeto del array traduce el texto en la llave body, las demás llaves déjalas intactas.
Procura usar un lenguaje que resulte familiar en el contexto latinoamericano.

```json
{jokes_batch}
```
"""


def translate(
    batch: Sequence[JokeEntry],
    config: GenerateContentConfig,
    client: Client,
):
    response = client.models.generate_content(
        model=model,
        contents=[
            Content(
                role="user",
                parts=[Part.from_text(text=prompt.format(jokes_batch=batch))],
            ),
        ],
        config=config,
    )
    return response


with open("/content/drive/MyDrive/Google AI Studio/stupid_stuff.json") as file:
    raw_jokes = load(file)

client = Client(api_key=userdata.get("GEMINI_API_KEY"))
model = "gemini-2.0-flash"
generate_content_config = GenerateContentConfig(
    temperature=1,
    response_mime_type="application/json",
    response_schema=list[JokeEntry],
    safety_settings=[
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_HATE_SPEECH,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
    ],
)

translated = []
for i, batch in enumerate(iter_batches(raw_jokes)):
    response = translate(batch, generate_content_config, client)
    if (
        response.parsed is None
        and response.candidates is not None
        and response.candidates[0].finish_reason
        in (FinishReason.RECITATION, FinishReason.MAX_TOKENS)
    ):
        for j, joke in enumerate(batch):
            response = translate([joke], generate_content_config, client)
            if (
                response.parsed is None
                and response.candidates is not None
                and response.candidates[0].finish_reason == FinishReason.RECITATION
            ):
                continue
            translated += response.parsed
    translated += response.parsed

In [None]:
translated_raw = [joke.model_dump() for joke in translated]
translated_path = "/content/drive/MyDrive/Google AI Studio/stupid_stuff_translated.json"
if exists(translated_path):
    with open(translated_path) as file:
        translated_raw = load(file) + translated_raw

with open(translated_path) as file:
    dump(translated_raw, file, indent=2)