In [28]:
import os
from dotenv import load_dotenv
import pandas as pd
from tqdm import tqdm
import time
import openai
from google import genai
from google.genai import types
import anthropic
from openai import OpenAI

# Load environment variables
dotenv_path = "/mnt/4d4f90e5-f220-481e-8701-f0a546491c35/arquivos/projetos/.env"
load_dotenv(dotenv_path=dotenv_path)

# Define the original Chinese text
# Replace with your actual Chinese text
original_ZH = "红豆生南国，春来发几枝？愿君多采撷，此物最相思。"

# Models and API keys
openai_api_key = os.getenv("OPENAI_API_KEY")
xai_api_key = os.getenv("XAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
claude_api_key = os.getenv("ANTHROPIC_API_KEY")
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")

models = {
    "gpt-4.5-preview-2025-02-27": {"api_key": openai_api_key, "client": openai.chat.completions.create, "type": "openai"},
    "grok-beta": {"api_key": xai_api_key, "client": OpenAI(api_key=xai_api_key, base_url="https://api.x.ai/v1").chat.completions.create, "type": "openai"},
    "gemini-2.0-flash-thinking-exp": {"api_key": google_api_key, "client": genai.Client(api_key=google_api_key).models.generate_content, "type": "google"},
    "claude-3-7-sonnet-20250219": {"api_key": claude_api_key, "client": anthropic.Anthropic(api_key=claude_api_key).messages.create, "type": "anthropic"},
    "deepseek-chat": {"api_key": deepseek_api_key, "client": OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com").chat.completions.create, "type": "openai"}
}

# Create a list to store the results
results = []

# Translation loop
for model_name, model_config in models.items():
    print(f"Translating with {model_name}...")
    
    generated_text_zh_en = None
    generated_text_en_zh = None
    
    try:
        # Translate ZH -> EN
        prompt_zh_en = f"Translate the following Chinese text to English, providing only the translated text without any additional explanations or context: {original_ZH}"
        
        if model_config["type"] == "openai":
            response_zh_en = model_config["client"](model=model_name, messages=[{"role": "user", "content": prompt_zh_en}])
            generated_text_zh_en = response_zh_en.choices[0].message.content
        elif model_config["type"] == "google":
            response_zh_en = model_config["client"](model=model_name, contents=prompt_zh_en)
            generated_text_zh_en = response_zh_en.text if hasattr(response_zh_en, 'text') else "No response generated"
        elif model_config["type"] == "anthropic":
            response_zh_en = model_config["client"](model=model_name, max_tokens=1000, messages=[{"role": "user", "content": prompt_zh_en}])
            generated_text_zh_en = response_zh_en.content[0].text
        else:
            generated_text_zh_en = "Unsupported model type"
            
    except Exception as e:
        print(f"Error processing ZH->EN for {model_name}: {e}")
        generated_text_zh_en = f"Error: {e}"

    try:
        # Translate EN -> ZH
        prompt_en_zh = f"Translate the following English text to Chinese, providing only the translated text without any additional explanations or context: {generated_text_zh_en}"
        
        if model_config["type"] == "openai":
            response_en_zh = model_config["client"](model=model_name, messages=[{"role": "user", "content": prompt_en_zh}])
            generated_text_en_zh = response_en_zh.choices[0].message.content
        elif model_config["type"] == "google":
            response_en_zh = model_config["client"](model=model_name, contents=prompt_en_zh)
            generated_text_en_zh = response_en_zh.text if hasattr(response_en_zh, 'text') else "No response generated"
        elif model_config["type"] == "anthropic":
            response_en_zh = model_config["client"](model=model_name, max_tokens=1000, messages=[{"role": "user", "content": prompt_en_zh}])
            generated_text_en_zh = response_en_zh.content[0].text
        else:
            generated_text_en_zh = "Unsupported model type"
            
    except Exception as e:
        print(f"Error processing EN->ZH for {model_name}: {e}")
        generated_text_en_zh = f"Error: {e}"
    
    # Store the results for this model
    results.append({
        "model": model_name,
        "original_ZH": original_ZH,
        "ZH_EN": generated_text_zh_en,
        "EN_ZH": generated_text_en_zh
    })

# Create the DataFrame from the results list
df = pd.DataFrame(results)


Translating with gpt-4.5-preview-2025-02-27...
Translating with grok-beta...
Translating with gemini-2.0-flash-thinking-exp...
Translating with claude-3-7-sonnet-20250219...
Translating with deepseek-chat...


In [29]:
# Print results
print(df)

# Save results
output_filename = "results_data/manual_translation_results.csv"
df.to_csv(output_filename, index=False)
print(f"Results saved to {output_filename}")


                           model               original_ZH  \
0     gpt-4.5-preview-2025-02-27  红豆生南国，春来发几枝？愿君多采撷，此物最相思。   
1                      grok-beta  红豆生南国，春来发几枝？愿君多采撷，此物最相思。   
2  gemini-2.0-flash-thinking-exp  红豆生南国，春来发几枝？愿君多采撷，此物最相思。   
3     claude-3-7-sonnet-20250219  红豆生南国，春来发几枝？愿君多采撷，此物最相思。   
4                  deepseek-chat  红豆生南国，春来发几枝？愿君多采撷，此物最相思。   

                                               ZH_EN  \
0  Red beans grow in southern lands,\nHow many br...   
1  The red bean grows in the southern land, how m...   
2  Red beans grow in the southern land.\nHow many...   
3  Red beans grow in the southern land,\nHow many...   
4  Red berries grow in southern land.  \nHow many...   

                                  EN_ZH  
0  红豆生南国，  \n春来发几枝？  \n愿君多采撷，  \n此物最相思。  
1              红豆生南国，春来发几枝？愿君多采撷，此物最相思。  
2            红豆生南国\n春来发几枝\n愿君多采撷\n此物最相思  
3        红豆生南国，\n春来发几枝？\n愿君多采撷，\n此物最相思。  
4  红豆生南国，  \n春来发几枝？  \n愿君多采撷，  \n此物最相思。  
Results saved to results_data/manual_t

In [31]:
import pandas as pd

# List of filenames and their corresponding sources
file_info = [
    {"filename": "results_data/manual_translation_results_ZHx-C0.csv", "source": "ZHx-C0a"},
    {"filename": "results_data/manual_translation_results_ZHx-C0.csv", "source": "ZHx-C0b"},
    {"filename": "results_data/manual_translation_results_ZHx-C1.csv", "source": "ZHx-C1a"},
    {"filename": "results_data/manual_translation_results_ZHx-C1.csv", "source": "ZHx-C1b"},
    {"filename": "results_data/manual_translation_results_ZHx-C2.csv", "source": "ZHx-C2"},
    {"filename": "results_data/manual_translation_results_ZHx-C3.csv", "source": "ZHx-C3"}
]

# Manual data to add
manual_data = [
    {"source": "ZHx-C0a", "model": "human", "original_ZH": "红豆生南国，春来发几枝？愿君多采撷，此物最相思。", "ZH_EN": "Red beans grow in the southern land, in spring, how many branches sprout? I wish you would gather them often, For they most evoke longing thoughts.", "EN_ZH": ""},
    {"source": "ZHx-C0b", "model": "human", "original_ZH": "红豆生南国，春来发几枝？愿君多采撷，此物最相思。", "ZH_EN": "Red beans grow in the south, sprouting many branches in spring. Pick them often, as they hold deep feelings of longing.", "EN_ZH": ""},
    {"source": "ZHx-C1a", "model": "human", "original_ZH": "水纹珍簟思悠悠，千里佳期一夕休。从此无心爱良夜，任他明月下西楼。", "ZH_EN": "Ripples on the jade mat, thoughts drift far and wide, A thousand miles of promise, in one night, all denied. From now on, I’ll have no heart for such fine nights, Let the bright moon set where it will, beyond my sight.", "EN_ZH": ""},
    {"source": "ZHx-C1b", "model": "human", "original_ZH": "水纹珍簟思悠悠，千里佳期一夕休。从此无心爱良夜，任他明月下西楼。", "ZH_EN": "Ripples stir the jade mat as thoughts drift endlessly afar, A thousand miles of vows dissolve beneath the evening star. No longer shall fine nights find a place within my heart, Let the moonlight wane and vanish, where its path departs.", "EN_ZH": ""},
    {"source": "ZHx-C2", "model": "human", "original_ZH": "林暗草惊风，将军夜引弓。平明寻白羽，没在石棱中。", "ZH_EN": "Amid the dark woods and startled grasses, the general draws his bow at night. At dawn he seeks the white fletching, Embedded in the stone’s edge, out of sight.", "EN_ZH": ""},
    {"source": "ZHx-C3", "model": "human", "original_ZH": "慈母手中线，游子身上衣。临行密密缝，意恐迟迟归。谁言寸草心，报得三春晖。", "ZH_EN": "From the threads a mother’s hand weaves, a gown for parting son is made. Sown stitch by stitch before he leaves, For fear his return be delayed. Such kindness as young grass receives from the warm sun can’t be repaid.", "EN_ZH": ""}
]

# Read each file into a DataFrame, add a source column, and store in a list
dfs = []
for info in file_info:
    df = pd.read_csv(info["filename"])
    # Strip whitespace and remove newlines from all string columns
    for col in df.select_dtypes(include='object'):
        df[col] = df[col].str.replace('\n', '', regex=False).str.strip().str.replace(' +', ' ', regex=True)
    df.insert(0, "source", info["source"])  # Insert source column at the beginning
    dfs.append(df)

# Create a DataFrame from the manual data
manual_df = pd.DataFrame(manual_data)

# Concatenate all DataFrames in the list with the manual data
combined_df = pd.concat(dfs + [manual_df], ignore_index=True)

# Sort the combined DataFrame by the 'source' and 'model' column
combined_df = combined_df.sort_values(by=['source', 'model'])

# Print the combined DataFrame
print(combined_df)

# Save the combined DataFrame to a new Excel file
output_filename = "results_data/combined_manual_translation_results.xlsx"
combined_df.to_excel(output_filename, index=False)
print(f"Combined results saved to {output_filename}")


     source                          model  \
3   ZHx-C0a     claude-3-7-sonnet-20250219   
4   ZHx-C0a                  deepseek-chat   
2   ZHx-C0a  gemini-2.0-flash-thinking-exp   
0   ZHx-C0a     gpt-4.5-preview-2025-02-27   
1   ZHx-C0a                      grok-beta   
30  ZHx-C0a                          human   
8   ZHx-C0b     claude-3-7-sonnet-20250219   
9   ZHx-C0b                  deepseek-chat   
7   ZHx-C0b  gemini-2.0-flash-thinking-exp   
5   ZHx-C0b     gpt-4.5-preview-2025-02-27   
6   ZHx-C0b                      grok-beta   
31  ZHx-C0b                          human   
13  ZHx-C1a     claude-3-7-sonnet-20250219   
14  ZHx-C1a                  deepseek-chat   
12  ZHx-C1a  gemini-2.0-flash-thinking-exp   
10  ZHx-C1a     gpt-4.5-preview-2025-02-27   
11  ZHx-C1a                      grok-beta   
32  ZHx-C1a                          human   
18  ZHx-C1b     claude-3-7-sonnet-20250219   
19  ZHx-C1b                  deepseek-chat   
17  ZHx-C1b  gemini-2.0-flash-thin