# Setup

Please ensure you have imported a Gemini API key from AI Studio.
You can do this directly in the Secrets tab on the left.

After doing so, please run the setup cell below.

In [None]:
!pip install -U -q "google"
!pip install -U -q "google.genai"

import os
from google.colab import userdata
from google.colab import drive
os.environ["GEMINI_API_KEY"] = userdata.get("GEMINI_API_KEY")

drive.mount("/content/drive")
# Please ensure that uploaded files are available in the AI Studio folder or change the working folder.
os.chdir("/content/drive/MyDrive/Google AI Studio")

# Generated Code

In [None]:
# To run this code you need to install the following dependencies:
# pip install google-genai

import base64
import os
import json
from typing import Sequence, Generator

from google.genai import Client as GeminiClient
from google.genai.types import (
    Content,
    Part,
    SafetySetting,
    GenerateContentConfig,
    HarmCategory,
    HarmBlockThreshold,
    FinishReason,
)
from pydantic import BaseModel


class JokeEntry(BaseModel):
    body: str
    id: int
    rating: float


def iter_batches(array: Sequence, batch_size: int = 32, start_from: int = 0) -> Generator[Sequence, None, None]:
    for i in range(start_from, len(array), batch_size):
        yield array[i:i+batch_size]

def translate(batch: list[JokeEntry], prompt: str, generate_content_config: GenerateContentConfig, client: GeminiClient):
    contents = [
        Content(
            role="user",
            parts=[
                Part.from_text(text=prompt.format(jokes_batch=batch)),
            ],
        ),
    ]
    response = client.models.generate_content(
        model=model,
        contents=contents,
        config=generate_content_config,
    )
    return response


with open("/content/drive/MyDrive/Google AI Studio/stupid_stuff.json") as f:
    raw_jokes = json.load(f)

prompt = """Traduce estos chistes al español, para cada objeto del array traduce el texto en la llave body, las demás llaves déjalas intactas.
Procura usar un lenguaje que resulte familiar en el contexto latinoamericano.

```json
{jokes_batch}
```
"""

client = GeminiClient(
    api_key=os.environ.get("GEMINI_API_KEY"),
)
model = "gemini-2.0-flash"
generate_content_config = GenerateContentConfig(
    temperature=1,
    response_mime_type="application/json",
    response_schema=list[JokeEntry],
    safety_settings=[
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_HATE_SPEECH,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
        SafetySetting(
            category=HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
            threshold=HarmBlockThreshold.BLOCK_NONE,
        ),
    ]
)

translated = []
for i,batch in enumerate(iter_batches(raw_jokes)):
    response = translate(batch, prompt, generate_content_config, client)
    if response.parsed is None and response.candidates[0].finish_reason in (FinishReason.RECITATION, FinishReason.MAX_TOKENS):
        print(f"batch {i=} with {response.candidates[0].finish_reason} issues")
        # reprocess each joke of the batch separately and isolate the ones with recitation issues
        for j, joke in enumerate(batch):
            response = translate([joke], prompt, generate_content_config, client)
            if response.parsed is None and response.candidates[0].finish_reason == FinishReason.RECITATION:
                print(f"sample with recitation issues, skip and translate later with an alternate approach {joke['id']}")
                continue

            translated += response.parsed
            print(f"recitation batch joke {j=}: {len(translated)=}")

    translated += response.parsed
    print(f"batch {i=}: {len(batch)=} {len(translated)=}")

In [None]:
translated_raw = [joke.model_dump() for joke in translated]
translaated_path = "/content/drive/MyDrive/Google AI Studio/stupid_stuff_translated.json"
if os.path.exists(translaated_path):
    with open(translaated_path) as f:
        translated_prior = json.load(f)
    translated_raw = translated_prior + translated_raw

with open(translaated_path) as f:
    json.dump(translated_raw, f, indent=2)

print(len(translated_raw), translated_raw[-1]["id"])