<a href="https://colab.research.google.com/github/lulu0o6/Google-translator-colab/blob/main/any%20language%20to%20CN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# DOC Auto to CN，谷歌翻译
# 安装依赖（适用于Colab）
!pip install python-docx googletrans==4.0.0-rc1 tqdm -q

from googletrans import Translator
from docx import Document
from docx.shared import Pt
from google.colab import files
from tqdm import tqdm
import time

translator = Translator(service_urls=['translate.google.com'])

def translate_text(text):
    if not text.strip():
        return text
    try:
        result = translator.translate(text, src='auto', dest='zh-cn') #任意语言翻译为中文
        return result.text
    except Exception as e:
        print("翻译错误:", e)
        return text

# 上传Word文件
uploaded = files.upload()
input_path = list(uploaded.keys())[0]

# 打开Word文档
doc = Document(input_path)
print(f"开始翻译 {len(doc.paragraphs)} 个段落...")

# 翻译每段
for para in tqdm(doc.paragraphs):
    if not para.text.strip():
        continue
    translated_text = translate_text(para.text)
    time.sleep(0.5)  # 防止被Google限流

    # 删除原有Run
    for _ in range(len(para.runs)):
        para.runs[0]._element.getparent().remove(para.runs[0]._element)

    # 添加翻译文本为新的Run
    new_run = para.add_run(translated_text)
    new_run.font.size = Pt(11)

# 保存新文档
output_path = "translated_output.docx"
doc.save(output_path)
print(f"翻译完成，保存为 {output_path}")

# 下载文件
files.download(output_path)

In [None]:
# PDF Auto to CN，谷歌翻译
# 安装依赖
!apt-get install -y libreoffice ocrmypdf tesseract-ocr tesseract-ocr-chi-sim > /dev/null
!pip install deep-translator python-docx tqdm PyMuPDF -q

from deep_translator import GoogleTranslator
from docx import Document
from docx.shared import Pt
from google.colab import files
from tqdm import tqdm
import fitz  # PyMuPDF
import time
import os

# 翻译函数
def translate_text(text):
    if not text.strip():
        return text
    try:
        return GoogleTranslator(source='auto', target='zh-CN').translate(text)
    except Exception as e:
        print("翻译错误:", e)
        return text

# 上传 PDF 文件
uploaded = files.upload()
input_pdf_path = list(uploaded.keys())[0]

# 尝试读取文字，判断是否需要 OCR
doc = fitz.open(input_pdf_path)
has_text = any(page.get_text("text").strip() for page in doc)

# 如果没有可选文字，执行 OCR 预处理
if not has_text:
    print("⚠️ 原始 PDF 无可选文字，正在执行 OCR 识别...")
    ocr_output_path = "ocr_output.pdf"
    !ocrmypdf --skip-text --language eng+nld+chi_sim "{input_pdf_path}" "{ocr_output_path}"
    pdf_path_to_use = ocr_output_path
    print("✅ OCR 完成，已生成含文字的 PDF")
else:
    print("✅ 检测到可选文字，跳过 OCR")
    pdf_path_to_use = input_pdf_path

# 重新读取（OCR 后）
pdf_doc = fitz.open(pdf_path_to_use)
total_pages = len(pdf_doc)
translated_doc = Document()

print(f"\n📖 开始翻译共 {total_pages} 页的 PDF 内容...\n")

# 翻译每一页
for page_num, page in enumerate(tqdm(pdf_doc, desc="📄 翻译中"), start=1):
    page_header = f"📄 第 {page_num} 页"
    header_para = translated_doc.add_paragraph()
    header_run = header_para.add_run(page_header)
    header_run.font.size = Pt(13)
    header_run.bold = True

    text = page.get_text("text")
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    for para_text in tqdm(lines, desc=f"↳ 第 {page_num} 页段落翻译", leave=False):
        translated = translate_text(para_text)
        time.sleep(0.5)
        para = translated_doc.add_paragraph()
        run = para.add_run(translated)
        run.font.size = Pt(11)

# 保存 Word 文件
docx_output = "translated_output.docx"
translated_doc.save(docx_output)

# 转换为 PDF
!libreoffice --headless --convert-to pdf "{docx_output}" > /dev/null

# 下载 PDF
pdf_output = "translated_output.pdf"
print("\n✅ 翻译完成，生成 PDF 文件 ✅")
files.download(pdf_output)


In [None]:
# PPT auto to CN，可翻译表格，表格字号12
# 安装依赖
!pip install googletrans==4.0.0-rc1 python-pptx tqdm -q

# 导入库
from google.colab import files
from pptx import Presentation
from pptx.util import Pt
from googletrans import Translator
from tqdm import tqdm
import time

# 初始化翻译器
translator = Translator(service_urls=['translate.google.com'])

# 翻译函数，含限速与失败重试
def translate_text(text, max_retries=3):
    if not text.strip():
        return text
    for attempt in range(max_retries):
        try:
            result = translator.translate(text, src='auto', dest='zh-cn')
            time.sleep(0.5)  # 限速
            return result.text
        except Exception as e:
            print(f"翻译失败，重试 {attempt+1}/{max_retries}：{e}")
            time.sleep(1)
    print("多次翻译失败，返回原文。")
    return text

# 上传PPT文件
uploaded = files.upload()
input_ppt_path = list(uploaded.keys())[0]

# 加载PPT
prs = Presentation(input_ppt_path)
total_slides = len(prs.slides)
print("📄 共加载幻灯片页数：", total_slides)

# 遍历每页幻灯片
for slide_idx, slide in enumerate(tqdm(prs.slides, desc="翻译幻灯片", unit="页"), start=1):
    print(f"\n➡️ 正在翻译第 {slide_idx}/{total_slides} 页")

    for shape in tqdm(slide.shapes, desc="处理形状", leave=False):
        # 翻译文本框
        if shape.has_text_frame:
            for para in shape.text_frame.paragraphs:
                orig_text = para.text
                translated = translate_text(orig_text)
                para.text = translated
                for run in para.runs:
                    run.font.size = Pt(12) #字号设置为12

        # 翻译表格单元格
        elif shape.shape_type == 19:  # 表格
            table = shape.table
            row_count = len(table.rows)
            col_count = len(table.columns)
            for i in range(row_count):
                for j in range(col_count):
                    cell = table.cell(i, j)
                    orig_text = cell.text
                    translated = translate_text(orig_text)
                    cell.text = translated
                       # 设置表格字体大小
                    for para in cell.text_frame.paragraphs:
                        for run in para.runs:
                            run.font.size = Pt(12) #字号设置为12

# 保存并下载结果
output_ppt = "translated_output.pptx"
prs.save(output_ppt)
print("\n✅ 翻译完成，准备下载...")
files.download(output_ppt)
