Skip to content

Commit

Permalink
add a chunk splitter, html/link content out
Browse files Browse the repository at this point in the history
  • Loading branch information
mrf0rtuna4 committed May 4, 2024
1 parent 6aba6bd commit 663d678
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/translate-readme.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
uses: actions/checkout@v2

- name: Run translation
uses: mrf0rtuna4/Github-Readme-AutoTranslator@master
uses: mrf0rtuna4/Github-Readme-AutoTranslator@v1.1.0
env:
GITHUB_TOKEN: ${{ secrets.GTK }}
LANGS: 'serbian,italian,english'
Expand Down
19 changes: 9 additions & 10 deletions core/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,21 @@
from deep_translator import GoogleTranslator
import re

print("Current working directory:", os.getcwd())


def read_readme():
print("Прочитал файлик ого")
with open("README.md", "r", encoding="utf-8") as file:
return file.read()


def update_localizations():
print("Starting update_localizations...")
readme_content = read_readme()
selected_langs = os.getenv("LANGS")
print("Selected langs:", selected_langs)
no_html_content = re.sub(r"<.*?>", "", readme_content)
no_links_content = re.sub(r"\[([^]]+)]\(([^)]+)\)", r"\1", no_html_content)

no_links_content = re.sub(r"\[([^]]+)]\(([^)]+)\)", r"\1", readme_content)
chunk_size = 5000
chunks = [no_links_content[i:i+chunk_size]
for i in range(0, len(no_links_content), chunk_size)]

languages = [lang.strip() for lang in selected_langs.split(",")]
files = []
Expand All @@ -27,8 +26,9 @@ def update_localizations():

for lang in languages:
try:
translated_content = GoogleTranslator(
source='auto', target=lang).translate(text=no_links_content)
translated_chunks = [GoogleTranslator(
source='auto', target=lang).translate(text=chunk) for chunk in chunks]
translated_content = " ".join(translated_chunks)

with open(f"dist/{lang}.md", "w", encoding="utf-8") as file:
file.write(translated_content)
Expand All @@ -37,8 +37,7 @@ def update_localizations():
except Exception as e:
print(f"Failed to translate to {lang}: {str(e)}")

print("update_localizations finished.")
return files


update_localizations()
update_localizations()

0 comments on commit 663d678

Please sign in to comment.