Skip to content
This repository has been archived by the owner on Mar 18, 2023. It is now read-only.

Refactor #3

Merged
merged 2 commits into from
Feb 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions deepl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import os
import time
import logging

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from srt import SRT


class translator:

languages = {
'auto': 'auto',
'chinese': 'zh',
'dutch': 'nl',
'english': 'en',
'english-us': 'en-us',
'english-uk': 'en-gb',
'french': 'fr',
'german': 'de',
'italian': 'it',
'japanese': 'ja',
'polish': 'pl',
'portuguese': 'pt',
'portuguese-br': 'pt-br',
'russian': 'ru',
'spanish': 'es'
}

def __init__(self):

### Opening browser
logging.info('Opening browser')
geckodriver_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'geckodriver'))
self.browser = webdriver.Firefox(executable_path=geckodriver_path)
self.browser.maximize_window()

### Going to deepl.com
logging.info('Going to deepl.com')
self.browser.get('https://www.deepl.com/translator')

### Getting source and target inputs
logging.info('Getting source and target inputs')
self.inputElement = self.browser.find_element_by_class_name('lmt__source_textarea')
self.targetElement = self.browser.find_element_by_class_name('lmt__target_textarea')

# Need to hide the cookiebanner so that way it doesnt obscure the language options
# Maybe there's a better way, but this works for now. Feel free to change it
cookieBanner = self.browser.find_element_by_id('dl_cookieBanner')
self.browser.execute_script("arguments[0].style.visibility='hidden'", cookieBanner)


def is_translated(self, text, traslation):

if len(traslation) == 0 or \
len(text.splitlines()) != len(traslation.splitlines()) or \
'[...]' in traslation:

return False

return True


def choose_language(self, languageSelect, lang):

def is_the_right_language(button, language):
button_dl_test_attrivute = button.get_attribute("dl-test").lower()
return button_dl_test_attrivute.endswith(self.languages[language])

languageSelect.click()

languages_menu = languageSelect.find_element_by_class_name('lmt__language_select__menu')
languagesButtons = languages_menu.find_elements_by_tag_name('button')
languageButton = next( (button for button in languagesButtons if is_the_right_language(button, lang)), None )

if not languageButton:
print(f'ERROR: Language {lang} was not found. Maybe you choose the same idiom on both')
self.close()
os._exit(os.EX_OK)
return

languageButton.click()


def translate_srt(self, file, lang_from, lang_to):

logging.info(f'Traslating file {file}')
srt_file = SRT(file)

sub_id = 0 # ID of initial subtitle
while sub_id < srt_file.n_subtitles:

# Clearing input
logging.info('Clearing input')
self.inputElement.clear()

while len(self.targetElement.get_attribute('value')) != 0:
time.sleep(1)

# Portion to of srt to translate
logging.info('Getting portion of srt to translate')
text, sub_id_f = srt_file.extract_portion(sub_id)

# Sending text
logging.info('Writing portion on input')
self.inputElement.send_keys(text)

# Getting traslation
logging.info('Traslating portion')
while not self.is_translated(text, self.targetElement.get_attribute('value')):
time.sleep(1)

traslation = self.targetElement.get_attribute('value').splitlines()

# Updating text on SRT
logging.info('Saving portion')
srt_file.update_text(sub_id, traslation)

# Getting next portion
sub_id = sub_id_f

# Wraping lines
logging.info(f'Wraping lines')
srt_file.wrap_lines()

# Saving file
filename = os.path.splitext(file)[0]
file_out = f'{filename}_{lang_to}.srt'

logging.info(f'saving {file_out}')
srt_file.save(file_out)


def translate(self, file_paths, lang_from, lang_to):

### Preparing page
languageToSelect = self.browser.find_element_by_class_name('lmt__language_select--target')
languageFromSelect = self.browser.find_element_by_class_name('lmt__language_select--source')

self.choose_language(languageFromSelect, lang_from)
self.choose_language(languageToSelect, lang_to)


for path in file_paths:

if not os.path.exists(path):
print(f"INFO: File {path} doesn't exist, skipping...")
continue

self.translate_srt(path, lang_from, lang_to)

# if os.path.isdir(path):
# files_in_dir = os.listdir(path)
# for file_path in [os.path.join(path, file_path) for file_path in files_in_dir]:
# if not os.path.isdir(file_path):
# translate_srt(file_path, lang_from, lang_to)
# else:
# translate_srt(path, lang_from, lang_to)


def close(self):
self.browser.quit()
166 changes: 50 additions & 116 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import os
import time
import argparse
import logging

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from srt import SRT
import deepl


parser = argparse.ArgumentParser(
description='Converts plain text files to docx and viceversa'
description='Translates .STR files using DeepL.com'
)

parser.add_argument(
Expand All @@ -19,123 +16,60 @@
nargs='+',
help='File to convert'
)

parser.add_argument(
"-v", "--verbose",
help="increase output verbosity",
action="store_true"
'-i',
'--input-lang',
type=str,
default='auto',
choices=('auto', 'chinese', 'dutch', 'english', 'french', 'german',
'italian', 'japanese', 'polish', 'portuguese', 'russian', 'spanish'),
help='Language to translate from'
)

parser.add_argument(
'-o',
'--output-lang',
type=str,
default='spanish',
choices=('chinese', 'dutch', 'english-us', 'english-uk', 'french', 'german',
'italian', 'japanese', 'polish', 'portuguese', 'portuguese-br', 'russian', 'spanish'),
help='Language to translate to'
)

args = parser.parse_args()

if args.verbose:
def verboseprint(*args):
for arg in args:
print(arg)
else:
verboseprint = lambda *a: None # do-nothing function


verboseprint("Opening browser")
geckodriver_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "geckodriver"))

os.environ['MOZ_HEADLESS'] = '1'
browser = webdriver.Firefox(executable_path=geckodriver_path)

verboseprint("Going to deepl.com")
browser.get("https://www.deepl.com/translator")

verboseprint("Getting source and target inputs")
inputElement = browser.find_element_by_class_name("lmt__source_textarea")
targetElement = browser.find_element_by_class_name("lmt__target_textarea")

def traslate_srt(file_in, file_out):

verboseprint("\n" + "=" * 50)
verboseprint(f"File to traslate {file_in}")
verboseprint("=" * 50)
# Opening file
srt_file = SRT(file_in)

sub_id = 0 # ID of initial subtitle
while sub_id < srt_file.n_subtitles:

verboseprint("Clearing input")

# Clearing input
inputElement.clear()

traslation = targetElement.get_attribute('value')

while len(traslation) != 0:
time.sleep(1)
traslation = targetElement.get_attribute('value')


verboseprint("Getting next portion")

# Portion to of srt to traslate
text, sub_id_f = srt_file.extract_portion(sub_id)


verboseprint("Writing portion on input")

# Sending text
inputElement.send_keys(text)

verboseprint("Traslating portion")

# Getting traslation
traslation = targetElement.get_attribute('value')

while not traslated(text, traslation):
time.sleep(1)
traslation = targetElement.get_attribute('value')

traslation = targetElement.get_attribute('value').splitlines()

verboseprint("Saving portion")

# Updation text on SRT
srt_file.update_text(sub_id, traslation)

# Getting next portion
sub_id = sub_id_f

# Saving file
verboseprint("Wraping lines")
srt_file.wrap_lines()
verboseprint(f"Saving {file_out}")
srt_file.save(file_out)


def traslated(text, traslation):

if len(traslation) == 0:
return False
elif len(text.splitlines()) != len(traslation.splitlines()):
return False
elif "[...]" in traslation:
return False

return True
parser.add_argument(
'-v',
'--verbose',
action="store_const",
dest="loglevel",
const=logging.INFO,
help="Increase output verbosity",
)

parser.add_argument(
'-vv',
'--debug',
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.WARNING,
help="Increase output verbosity for debugging",
)

def traslate_all(file_paths):
parser.add_argument(
'-s',
dest="show_gui",
action='store_false',
help='Show browser window'
)

for path in file_paths:

if not os.path.exists(path):
print(f"File '{path}' doesn't exist, skipping...")
continue

if os.path.isdir(path):
for file_path in [os.path.join(path, file_path) for file_path in os.listdir(path)]:
if not os.path.isdir(file_path):
traslate_srt(file_path, os.path.splitext(file_path)[0] + "_traslated.srt")
else:
traslate_srt(path, os.path.splitext(path)[0] + ".srt")
args = parser.parse_args()
logging.basicConfig(level=args.loglevel)

traslate_all(args.filepath)
if args.show_gui:
os.environ['MOZ_HEADLESS'] = '1'

browser.quit()
translator = deepl.translator()
translator.translate(args.filepath, args.input_lang, args.output_lang)
translator.close()
18 changes: 12 additions & 6 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
# SRT-DeepL translator using selenium
# SRT-DeepL translator using Selenium

## Usage
```
main.py [-h] [-v] path [path ...]
main.py [-h] [-v] [-vv] [-s] [-i {language}] [-o {language}] path [path ...]

Traslate a .SRT file using DeepL and Selenium

positional arguments:
path: File to convert

optional arguments:
-h, --help: show this help message and exit
-v, --verbose: increase output verbosity
-h, --help: Show this help message and exit
-v, --verbose: Increase output verbosity
-vv, --debug: Increase output verbosity for debugging
-s: Show browser window
-i, --input-lang: Language to translate from
choices: { auto, chinese, dutch, english, french, german, italian, japanese, polish, portuguese, russian, spanish }
-o, --output-lang: Language to translate to
choices: { chinese, dutch, english-us, english-uk, french, german, italian, japanese, polish, portuguese, portuguese-br, russian, spanish}
```

## Setup
Expand All @@ -22,5 +28,5 @@ use ```pip install -r requirements.txt``` or ```execute setup.sh```
Geckodriver -v0.24.0

## Features to add
- Set language to traslate before start translation
- Use voice recognition and generate the subtitles
- Use voice recognition to generate the subtitles
- Use OCR to extract forced subtitles
Loading