From 663d678a3299937a4a715672f60f375730c51fd0 Mon Sep 17 00:00:00 2001 From: mrf0rtuna4 Date: Sat, 4 May 2024 19:40:06 +0300 Subject: [PATCH] add a chunk splitter, html/link content out --- .github/workflows/translate-readme.yml | 2 +- core/translator.py | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/translate-readme.yml b/.github/workflows/translate-readme.yml index f049d7f..24236ef 100644 --- a/.github/workflows/translate-readme.yml +++ b/.github/workflows/translate-readme.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v2 - name: Run translation - uses: mrf0rtuna4/Github-Readme-AutoTranslator@master + uses: mrf0rtuna4/Github-Readme-AutoTranslator@v1.1.0 env: GITHUB_TOKEN: ${{ secrets.GTK }} LANGS: 'serbian,italian,english' diff --git a/core/translator.py b/core/translator.py index 75c607a..7746e1e 100644 --- a/core/translator.py +++ b/core/translator.py @@ -2,22 +2,21 @@ from deep_translator import GoogleTranslator import re -print("Current working directory:", os.getcwd()) - def read_readme(): - print("Прочитал файлик ого") with open("README.md", "r", encoding="utf-8") as file: return file.read() def update_localizations(): - print("Starting update_localizations...") readme_content = read_readme() selected_langs = os.getenv("LANGS") - print("Selected langs:", selected_langs) + no_html_content = re.sub(r"<.*?>", "", readme_content) + no_links_content = re.sub(r"\[([^]]+)]\(([^)]+)\)", r"\1", no_html_content) - no_links_content = re.sub(r"\[([^]]+)]\(([^)]+)\)", r"\1", readme_content) + chunk_size = 5000 + chunks = [no_links_content[i:i+chunk_size] + for i in range(0, len(no_links_content), chunk_size)] languages = [lang.strip() for lang in selected_langs.split(",")] files = [] @@ -27,8 +26,9 @@ def update_localizations(): for lang in languages: try: - translated_content = GoogleTranslator( - source='auto', target=lang).translate(text=no_links_content) + translated_chunks = [GoogleTranslator( + source='auto', target=lang).translate(text=chunk) for chunk in chunks] + translated_content = " ".join(translated_chunks) with open(f"dist/{lang}.md", "w", encoding="utf-8") as file: file.write(translated_content) @@ -37,8 +37,7 @@ def update_localizations(): except Exception as e: print(f"Failed to translate to {lang}: {str(e)}") - print("update_localizations finished.") return files -update_localizations() +update_localizations() \ No newline at end of file