In [6]:
import os
import json

import openai
from openai import OpenAI

In [7]:
api_key = os.getenv("OPENAI_API_KEY_MA")
if api_key is None:
    raise ValueError(
        "API key not found. Please set the OPENAI_API_KEY environment variable."
    )

In [8]:
oai_client = OpenAI(api_key=api_key)
gpt_model = "gpt-4o-mini-2024-07-18"

In [32]:
REPHRASE_PROMPT_TEMPLATES = {
    "en": {
        "system": (
            "You are an expert in paraphrasing text. Your task is to generate {num} distinct rephrasings "
            "of the provided text while maintaining the original meaning. Ensure that each rephrasing is "
            "unique and not a simple repetition of the original."
        ),
        "user": {
            "low": (
                "Generate {num} paraphrases of the following text:\n\n{text}\n\n"
                "Make only minor lexical substitutions. Keep sentence structure and phrasing nearly identical."
            ),
            "medium": (
                "Generate {num} paraphrases of the following text:\n\n{text}\n\n"
                "Change vocabulary and some sentence structures, while keeping the same overall message."
            ),
            "high": (
                "Generate {num} paraphrases of the following text:\n\n{text}\n\n"
                "Drastically rephrase and restructure the content. Be creative in how the message is conveyed, but ensure the core meaning is preserved."
            ),
        },
    },
    "de": {
        "system": (
            "Du bist ein Experte für das Paraphrasieren von Texten. Deine Aufgabe ist es, {num} verschiedene Umschreibungen "
            "des bereitgestellten Textes zu erstellen, wobei die ursprüngliche Bedeutung beibehalten werden soll. Jede Umschreibung "
            "muss einzigartig sein und darf den Originaltext nicht einfach wiederholen."
        ),
        "user": {
            "low": (
                "Erzeuge {num} Umschreibungen des folgenden Textes:\n\n{text}\n\n"
                "Nur minimale Wortänderungen. Satzstruktur und Formulierung bleiben fast unverändert."
            ),
            "medium": (
                "Erzeuge {num} Umschreibungen des folgenden Textes:\n\n{text}\n\n"
                "Verändere Wortwahl und teilweise die Satzstruktur, aber behalte die Hauptaussage bei."
            ),
            "high": (
                "Erzeuge {num} Umschreibungen des folgenden Textes:\n\n{text}\n\n"
                "Formuliere stark um und strukturiere den Inhalt kreativ um. Die Kernbedeutung muss erhalten bleiben."
            ),
        },
    },
    "es": {
        "system": (
            "Eres un experto en parafrasear textos. Tu tarea es generar {num} reformulaciones distintas del texto proporcionado, "
            "manteniendo el significado original. Cada reformulación debe ser única y no simplemente repetir el texto original."
        ),
        "user": {
            "low": (
                "Genera {num} paráfrasis del siguiente texto:\n\n{text}\n\n"
                "Realiza solo pequeños cambios léxicos. Mantén la estructura de las oraciones casi igual."
            ),
            "medium": (
                "Genera {num} paráfrasis del siguiente texto:\n\n{text}\n\n"
                "Cambia vocabulario y algunas estructuras de oración, pero conserva el mensaje general."
            ),
            "high": (
                "Genera {num} paráfrasis del siguiente texto:\n\n{text}\n\n"
                "Reformula y reestructura completamente el contenido. Sé creativo sin alterar el significado esencial."
            ),
        },
    },
    "fr": {
        "system": (
            "Vous êtes un expert en reformulation de texte. Votre tâche est de générer {num} reformulations distinctes "
            "du texte fourni tout en maintenant le sens original. Chaque reformulation doit être unique et ne pas simplement "
            "répéter le texte original."
        ),
        "user": {
            "low": (
                "Générez {num} reformulations du texte suivant :\n\n{text}\n\n"
                "Effectuez uniquement des substitutions lexicales mineures. Gardez une structure similaire."
            ),
            "medium": (
                "Générez {num} reformulations du texte suivant :\n\n{text}\n\n"
                "Changez le vocabulaire et certaines structures tout en conservant le message principal."
            ),
            "high": (
                "Générez {num} reformulations du texte suivant :\n\n{text}\n\n"
                "Reformulez et réorganisez le texte en profondeur, en préservant le sens global."
            ),
        },
    },
    "it": {
        "system": (
            "Sei un esperto nella parafrasi dei testi. Il tuo compito è generare {num} riformulazioni distinte del testo fornito "
            "mantenendo il significato originale. Ogni riformulazione deve essere unica e non una semplice ripetizione."
        ),
        "user": {
            "low": (
                "Genera {num} parafrasi del seguente testo:\n\n{text}\n\n"
                "Modifica solo poche parole. Mantieni struttura e sintassi quasi invariate."
            ),
            "medium": (
                "Genera {num} parafrasi del seguente testo:\n\n{text}\n\n"
                "Cambia parole e alcune strutture, mantenendo il significato centrale."
            ),
            "high": (
                "Genera {num} parafrasi del seguente testo:\n\n{text}\n\n"
                "Riformula in modo significativo. Ristruttura il contenuto con creatività senza alterare il significato."
            ),
        },
    },
    "ko": {
        "system": (
            "당신은 텍스트를 바꾸는 전문가입니다. 당신의 임무는 제공된 텍스트의 원래 의미를 유지하면서 {num}가지 독특한 바꾸기를 생성하는 것입니다. 각 바꾸기가 독특하고 원본 텍스트를 단순히 반복하지 않도록 하십시오."
        ),
        "user": {
            "low": (
                "다음 텍스트의 {num}가지 바꾸기를 생성하십시오:\n\n{text}\n\n"
                "단어 수준에서 약간의 변경만 하십시오. 문장 구조는 거의 그대로 유지하십시오."
            ),
            "medium": (
                "다음 텍스트의 {num}가지 바꾸기를 생성하십시오:\n\n{text}\n\n"
                "다양한 단어와 문장 구조를 사용하되 의미는 유지하십시오."
            ),
            "high": (
                "다음 텍스트의 {num}가지 바꾸기를 생성하십시오:\n\n{text}\n\n"
                "창의적으로 재구성하고 표현을 완전히 바꾸되, 핵심 의미는 그대로 유지하십시오."
            ),
        },
    },
    "pt": {
        "system": (
            "Você é um especialista em parafrasear textos. Sua tarefa é gerar {num} reformulações distintas do texto fornecido, mantendo o significado original. Cada reformulação deve ser única e não apenas repetir o texto original."
        ),
        "user": {
            "low": (
                "Gere {num} paráfrases do seguinte texto:\n\n{text}\n\n"
                "Faça apenas substituições leves de palavras. Mantenha a estrutura original."
            ),
            "medium": (
                "Gere {num} paráfrases do seguinte texto:\n\n{text}\n\n"
                "Altere o vocabulário e parte da estrutura, preservando o sentido."
            ),
            "high": (
                "Gere {num} paráfrases do seguinte texto:\n\n{text}\n\n"
                "Reestruture e reformule de forma criativa. Preserve o significado essencial."
            ),
        },
    },
    "ru": {
        "system": (
            "Вы являетесь экспертом в перефразировании текста. Ваша задача — создать {num} различных перефразировок предоставленного текста, сохраняя оригинальное значение. Каждая перефразировка должна быть уникальной и не дублировать исходный текст."
        ),
        "user": {
            "low": (
                "Создайте {num} перефразировок следующего текста:\n\n{text}\n\n"
                "Измените только отдельные слова. Структура предложений должна остаться почти такой же."
            ),
            "medium": (
                "Создайте {num} перефразировок следующего текста:\n\n{text}\n\n"
                "Измените лексику и частично структуру, сохранив общий смысл."
            ),
            "high": (
                "Создайте {num} перефразировок следующего текста:\n\n{text}\n\n"
                "Кардинально переформулируйте и перестройте текст, не изменяя его суть."
            ),
        },
    },
    "zh": {
        "system": (
            "你是一个文本改写的专家。你的任务是生成提供的文本的{num}种不同的改写，同时保持原意。确保每个改写都是独特的，而不是简单地重复原始文本。"
        ),
        "user": {
            "low": (
                "生成以下文本的{num}种改写：\n\n{text}\n\n"
                "仅做轻微的词语替换，语序和句式基本保持一致。"
            ),
            "medium": (
                "生成以下文本的{num}种改写：\n\n{text}\n\n"
                "更换部分词语和语句结构，保留原始含义。"
            ),
            "high": (
                "生成以下文本的{num}种改写：\n\n{text}\n\n"
                "可大幅度重构和重新表达内容，只要保留主要意思即可。"
            ),
        },
    },
}

In [33]:
def rephrase_text(
    text_to_rephrase,
    client,
    model,
    language="en",
    number_of_rephrasings=3,
    divergence="medium",
):
    supported_languages = REPHRASE_PROMPT_TEMPLATES.keys()

    if language not in supported_languages:
        raise ValueError(
            f"Language '{language}' is not supported. Supported languages are: {list(supported_languages)}"
        )

    if not isinstance(number_of_rephrasings, int) or number_of_rephrasings < 1:
        raise ValueError("number_of_rephrasings must be a positive integer")

    if divergence not in ["low", "medium", "high"]:
        raise ValueError("divergence must be 'low', 'medium', or 'high'")

    system_prompt = REPHRASE_PROMPT_TEMPLATES[language]["system"].format(
        num=number_of_rephrasings
    )
    user_prompt = REPHRASE_PROMPT_TEMPLATES[language]["user"][divergence].format(
        text=text_to_rephrase, num=number_of_rephrasings
    )

    try:
        response = client.responses.create(
            model=model,
            input=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            text={
                "format": {
                    "type": "json_schema",
                    "name": "rephrasings_list",
                    "strict": True,
                    "schema": {
                        "type": "object",
                        "properties": {
                            "rephrasings": {
                                "type": "array",
                                "description": f"A list of {number_of_rephrasings} rephrased versions of the original input.",
                                "items": {
                                    "type": "string",
                                    "description": "A single rephrased sentence.",
                                },
                            },
                        },
                        "required": ["rephrasings"],
                        "additionalProperties": False,
                    },
                }
            },
        )
        try:
            return json.loads(response.output_text)
        except json.JSONDecodeError as json_error:
            print(f"JSONDecodeError: {json_error}")
            return {"error": "Invalid JSON response from API"}
    except openai.BadRequestError as e:
        print(f"Error: {str(e)}")
        return {"error": str(e)}

In [34]:
sentence = {
    "en": "The quick brown fox jumps over the lazy dog",
    "de": "Der schnelle braune Fuchs springt über den faulen Hund",
    "es": "El rápido zorro marrón salta sobre el perro perezoso",
    "fr": "Le renard brun rapide saute par-dessus le chien paresseux",
    "it": "La veloce volpe marrone salta sopra il cane pigro",
    "ko": "빠른 갈색 여우가 게으른 개를 뛰어넘습니다",
    "pt": "A rápida raposa marrom pula sobre o cão preguiçoso",
    "ru": "Быстрая коричневая лиса прыгает через ленивую собаку",
    "zh": "快速的棕色狐狸跳过懒狗"
}

In [35]:
choice = "de"

In [36]:
print(f"Original sentence in {choice}:\n{sentence[choice]}")

for divergence in ["low", "medium", "high"]:
    rephrase = rephrase_text(
        sentence[choice],
        client=oai_client,
        model=gpt_model,
        language=choice,
        number_of_rephrasings=7,
        divergence=divergence,
    )
    print(f"\nDivergence: {divergence}")
    for phrasing in rephrase["rephrasings"]:
        print(phrasing)

Original sentence in de:
Der schnelle braune Fuchs springt über den faulen Hund

Divergence: low
Der flinke braune Fuchs hüpft über den träge daliegenden Hund.
Der zügige braune Fuchs springt über den lethargischen Hund.
Der rasante braune Fuchs springt über den schläfrigen Hund.
Der schnelle braune Fuchs springt über den müden Hund.
Der hurtige braune Fuchs springt über den faulen Hund.
Der eilende braune Fuchs springt über den bequemen Hund.
Der flinke braune Fuchs springt über den inaktiven Hund.

Divergence: medium
Der flinke braune Fuchs hüpft über den träge daliegenden Hund.
Ein rasanter brauner Fuchs springt über den lethargischen Hund hinweg.
Der zügige braune Fuchs springt über den gelangweilten Hund.
Ein schneller, brauner Fuchs springt über den faulen Hund hinweg.
Der agile braune Fuchs springt über den untätigen Hund.
Der flotte Fuchs in brauner Farbe springt über den schläfrigen Hund.
Ein wendiger brauner Fuchs hüpft über den faulen Hund.

Divergence: high
Ein flinkes, bra

In [37]:
real_example = "Chili peppers are known for their spicy flavor, but the spiciness can vary depending on the type of pepper. The spiciest part of a chili pepper is typically the placenta, which is the white, spongy tissue that surrounds the seeds inside the pepper. The placenta contains most of the pepper's capsaicin"
print("Real Example - Original Text:")
print(real_example)

for divergence in ["low", "medium", "high"]:
    rephrase = rephrase_text(
        real_example,
        client=oai_client,
        model=gpt_model,
        language="en",
        number_of_rephrasings=3,
        divergence=divergence,
    )
    print(f"\nReal Example - Divergence: {divergence}")
    for phrasing in rephrase["rephrasings"]:
        print(phrasing)

Real Example - Original Text:
Chili peppers are known for their spicy flavor, but the spiciness can vary depending on the type of pepper. The spiciest part of a chili pepper is typically the placenta, which is the white, spongy tissue that surrounds the seeds inside the pepper. The placenta contains most of the pepper's capsaicin

Real Example - Divergence: low
Chili peppers are recognized for their zesty flavor, but the heat level can differ based on the variety of pepper. The hottest section of a chili pepper is usually the placenta, which is the pale, sponge-like tissue that encases the seeds within the pepper. The placenta holds the majority of the pepper's capsaicin.
Chili peppers are famous for their fiery taste, yet the heat can fluctuate depending on the kind of pepper. The most intense area of a chili pepper is generally the placenta, which is the light-colored, porous tissue surrounding the seeds inside the pepper. Most of the pepper's capsaicin is found in the placenta.
Chil