From cbbfadeea86badce4c69f00f70cc6432ffa66a7d Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Tue, 13 Sep 2022 16:35:38 -0400 Subject: [PATCH 01/14] adding Argos Translate component --- python/ArgosTranslation/Dockerfile | 0 .../argos_translation_component/__init__.py | 27 ++ .../argos_translation_component.py | 253 ++++++++++++++++++ .../plugin-files/descriptor.json | 91 +++++++ python/ArgosTranslation/pyproject.toml | 29 ++ .../sample_argos_translator.py | 53 ++++ python/ArgosTranslation/setup.cfg | 39 +++ .../tests/data/spanish_short.txt | 1 + .../tests/test_argos_translate.py | 139 ++++++++++ 9 files changed, 632 insertions(+) create mode 100644 python/ArgosTranslation/Dockerfile create mode 100644 python/ArgosTranslation/argos_translation_component/__init__.py create mode 100644 python/ArgosTranslation/argos_translation_component/argos_translation_component.py create mode 100644 python/ArgosTranslation/plugin-files/descriptor.json create mode 100644 python/ArgosTranslation/pyproject.toml create mode 100644 python/ArgosTranslation/sample_argos_translator.py create mode 100644 python/ArgosTranslation/setup.cfg create mode 100644 python/ArgosTranslation/tests/data/spanish_short.txt create mode 100644 python/ArgosTranslation/tests/test_argos_translate.py diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile new file mode 100644 index 00000000..e69de29b diff --git a/python/ArgosTranslation/argos_translation_component/__init__.py b/python/ArgosTranslation/argos_translation_component/__init__.py new file mode 100644 index 00000000..2966364e --- /dev/null +++ b/python/ArgosTranslation/argos_translation_component/__init__.py @@ -0,0 +1,27 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +from .argos_translation_component import ArgosTranslationComponent, TranslationWrapper diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py new file mode 100644 index 00000000..6b4d593e --- /dev/null +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -0,0 +1,253 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +from argostranslate import package, translate +from typing import Sequence, Dict +import pathlib + +import logging + +import mpf_component_api as mpf +import mpf_component_util as mpf_util + + +logger = logging.getLogger('ArgosTranslationComponent') + + +class ArgosTranslationComponent: + detection_type = 'TRANSLATION' + + def __init__(self): + logger.extra = {} + + @staticmethod + def get_detections_from_video(job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: + logger.info(f'Received video job') + + try: + if job.feed_forward_track is None: + raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( + f'Component can only process feed forward ' + ' jobs, but no feed forward track provided. ') + + tw = TranslationWrapper(job.job_properties) + tw.add_translations(job.feed_forward_track.detection_properties) + + for ff_location in job.feed_forward_track.frame_locations.values(): + tw.add_translations(ff_location.detection_properties) + + return [job.feed_forward_track] + + except Exception: + logger.exception( + f'Failed to complete job due to the following exception:') + raise + + @staticmethod + def get_detections_from_image(job: mpf.ImageJob) -> Sequence[mpf.ImageLocation]: + logger.info(f'Received image job') + + try: + if job.feed_forward_location is None: + raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( + f'Component can only process feed forward ' + ' jobs, but no feed forward track provided. ') + + tw = TranslationWrapper(job.job_properties) + tw.add_translations(job.feed_forward_location.detection_properties) + + return [job.feed_forward_location] + + except Exception: + logger.exception( + f'Failed to complete job due to the following exception:') + raise + + @staticmethod + def get_detections_from_audio(job: mpf.AudioJob) -> Sequence[mpf.AudioTrack]: + logger.info(f'Received audio job') + + try: + if job.feed_forward_track is None: + raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( + f'Component can only process feed forward ' + ' jobs, but no feed forward track provided. ') + + tw = TranslationWrapper(job.job_properties) + tw.add_translations(job.feed_forward_track.detection_properties) + + return [job.feed_forward_track] + + except Exception: + logger.exception( + f'Failed to complete job due to the following exception:') + raise + + @staticmethod + def get_detections_from_generic(job: mpf.GenericJob) -> Sequence[mpf.GenericTrack]: + if job.feed_forward_track: + tw = TranslationWrapper(job.job_properties) + tw.add_translations(job.feed_forward_track.detection_properties) + return [job.feed_forward_track] + else: + logger.info('Job did not contain a feed forward track. Assuming ' + 'media file is a plain text file containing the text to ' + 'be translated.') + text = pathlib.Path(job.data_uri).read_text().strip() + + new_job_props = { + **job.job_properties, + 'FEED_FORWARD_PROP_TO_PROCESS': 'TEXT' + } + new_ff_props = dict(TEXT=text) + ff_track = mpf.GenericTrack(detection_properties=new_ff_props) + + tw = TranslationWrapper(new_job_props) + tw.add_translations(new_ff_props) + + return[ff_track] + + +class TranslationWrapper: + def __init__(self, job_props): + self.supported_languages = self.get_supported_languages() + + self._installed_languages = translate.get_installed_languages() + self.installed_lang_codes = [lang.code for lang in self._installed_languages] + + self._props_to_translate = [ + prop.strip() for prop in + mpf_util.get_property( + properties=job_props, + key='FEED_FORWARD_PROP_TO_PROCESS', + default_value='TEXT,TRANSCRIPT', + prop_type=str + ).split(',') + ] + + self._lang_prop_names = [ + prop.strip() for prop in + mpf_util.get_property( + properties=job_props, + key='LANGUAGE_FEED_FORWARD_PROP', + default_value='DECODED_LANGUAGE,LANGUAGE', + prop_type=str + ).split(',') + ] + + self._from_lang = mpf_util.get_property( + properties=job_props, + key='DEFAULT_FROM_LANGUAGE', + default_value='es', + prop_type=str + ).lower().strip() + + self._to_lang = "en" + + @staticmethod + def get_supported_languages(): + try: + available_packages = package.get_available_packages() + except Exception: + # TODO log downloading package index + package.update_package_index() + available_packages = package.get_available_packages() + + available_packages = [y.from_code for y in list( + filter( + lambda x: x.to_code == "en", available_packages + ) + )] + return available_packages + + def add_translations(self, ff_props: Dict[str, str]): + for prop_to_translate in self._props_to_translate: + input_text = ff_props.get(prop_to_translate, None) + if input_text: + break + else: + logger.warning("No text to translate found in track") + return + + for lang_prop_name in self._lang_prop_names: + if lang_prop_name in ff_props: + lang = ff_props.get(lang_prop_name).lower().strip() + if lang in self.supported_languages: + self._from_lang = lang + break + if lang == 'en': + self._from_lang = lang + else: + if self._from_lang == 'en': + ff_props['SKIPPED_TRANSLATION'] = 'TRUE' + logger.info(f'Skipped translation of the "{prop_to_translate}" ' + f'property because it was already in the target language.') + return + if self._from_lang not in self.supported_languages: + raise mpf.DetectionError.DETECTION_FAILED.exception( + f"Source language, {self._from_lang}, is not " + "supported." + ) + + # print(self._from_lang) + print(self.supported_languages) + # TODO use valid translations to determine if package needs to be downloaded + # valid_translations = list( + # filter(lambda x: x.to_lang.code == self._to_lang, from_lang.translations_from) + # ) + if self._from_lang not in self.installed_lang_codes: + # print("installing language", self._from_lang) + available_packages = package.get_available_packages() + # print(available_packages) + available_package = list( + filter( + lambda x: x.from_code == self._from_lang and x.to_code == self._to_lang, available_packages + ) + )[0] + # print(available_package) + download_path = available_package.download() + # print(download_path) + package.install_from_path(download_path) + + self.installed_lang_codes = [lang.code for lang in translate.get_installed_languages()] + self._installed_languages = translate.get_installed_languages() + + from_lang = list(filter( + lambda x: x.code == self._from_lang, + self._installed_languages))[0] + to_lang = list(filter( + lambda x: x.code == self._to_lang, + self._installed_languages))[0] + + # TODO this will be none if no valid translation model for from_lang->to_lang + translation = from_lang.get_translation(to_lang) + + # print(translation) + translated_text = translation.translate(input_text) + # print(translated_text) + + ff_props['TRANSLATION_SOURCE_LANGUAGE'] = self._from_lang + ff_props['TRANSLATION'] = translated_text diff --git a/python/ArgosTranslation/plugin-files/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor.json new file mode 100644 index 00000000..eef745a7 --- /dev/null +++ b/python/ArgosTranslation/plugin-files/descriptor.json @@ -0,0 +1,91 @@ +{ + "componentName": "AzureTranslation", + "componentVersion": "7.0", + "middlewareVersion": "7.0", + "sourceLanguage": "python", + "batchLibrary": "AzureTranslation", + "environmentVariables": [], + "algorithm": { + "name": "AZURETRANSLATION", + "description": "Uses Azure Cognitive Services to perform translation.", + "actionType": "DETECTION", + "requiresCollection": { + "states": [] + }, + "providesCollection": { + "states": [ + "DETECTION", + "DETECTION_TRANSLATION", + "DETECTION_TRANSLATION_AZURE" + ], + "properties": [ + { + "name": "FEED_FORWARD_PROP_TO_PROCESS", + "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection to consider translating. If the first property listed is present, then that property will be translated. If it's not, then the next property in the list is considered. At most, one property will be translated.", + "type": "STRING", + "defaultValue": "TEXT,TRANSCRIPT" + }, + { + "name": "LANGUAGE_FEED_FORWARD_PROP", + "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection determine the language from which to translate. If the first property listed is present, then that property will be used. If it's not, then the next property in the list is considered. If none are present, fall back to FROM_LANGUAGE.", + "type": "STRING", + "defaultValue": "DECODED_LANGUAGE,LANGUAGE" + }, + { + "name": "TO_LANGUAGE", + "description": "The ISO 639 language code for language that the properties should be translated to.", + "type": "STRING", + "defaultValue": "en" + }, + { + "name": "DEFAULT_FROM_LANGUAGE", + "description": "Optional property that indicates the source language of the text.", + "type": "STRING", + "defaultValue": "es" + } + ] + } + }, + "actions": [ + { + "name": "", + "description": "", + "algorithm": "", + "properties": [ + { + "name": "FEED_FORWARD_TYPE", + "value": "REGION" + }, + { + "name": "OUTPUT_MERGE_WITH_PREVIOUS_TASK", + "value": "TRUE" + } + ] + }, + { + "name": "", + "description": "Uses Argos Translate to perform translation on a plain text file.", + "algorithm": "", + "properties": [ + ] + } + ], + "tasks": [ + { + "name": "", + "description": "", + "actions": [ + "" + ] + } + ], + "pipelines": [ + { + "name": "", + "description": "", + "tasks": [ + "" + ] + } + ] +} \ No newline at end of file diff --git a/python/ArgosTranslation/pyproject.toml b/python/ArgosTranslation/pyproject.toml new file mode 100644 index 00000000..8717e0c8 --- /dev/null +++ b/python/ArgosTranslation/pyproject.toml @@ -0,0 +1,29 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/python/ArgosTranslation/sample_argos_translator.py b/python/ArgosTranslation/sample_argos_translator.py new file mode 100644 index 00000000..f2a8e474 --- /dev/null +++ b/python/ArgosTranslation/sample_argos_translator.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +import sys + +from argos_translation_component import TranslationWrapper + + +def main(): + if len(sys.argv) != 3: + sys.exit(f'Usage {sys.argv[0]} ') + + _, from_lang, text = sys.argv + + detection_props = dict(TEXT="Bonjour") + job_props = dict(FROM_LANGUAGE=from_lang) + TranslationWrapper(job_props).add_translations(detection_props) + + """ + print('TRANSLATION SOURCE LANGUAGE:', detection_props['TRANSLATION SOURCE LANGUAGE']) + print('TRANSLATION SOURCE LANGUAGE CONFIDENCE:', detection_props['TRANSLATION SOURCE LANGUAGE CONFIDENCE']) + print('TRANSLATION:') + print(detection_props['TRANSLATION']) + """ + + +if __name__ == '__main__': + main() diff --git a/python/ArgosTranslation/setup.cfg b/python/ArgosTranslation/setup.cfg new file mode 100644 index 00000000..6eb952ea --- /dev/null +++ b/python/ArgosTranslation/setup.cfg @@ -0,0 +1,39 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +[metadata] +name = ArgosTranslation +version = 0.1 + +[options] +packages = argos_translation_component +install_requires = + mpf_component_api>=7.0 + mpf_component_util>=7.0 + +[options.entry_points] +mpf.exported_component = + component = argos_translation_component.argos_translation_component:ArgosTranslationComponent \ No newline at end of file diff --git a/python/ArgosTranslation/tests/data/spanish_short.txt b/python/ArgosTranslation/tests/data/spanish_short.txt new file mode 100644 index 00000000..acd078c9 --- /dev/null +++ b/python/ArgosTranslation/tests/data/spanish_short.txt @@ -0,0 +1 @@ +¿Dónde está la biblioteca? \ No newline at end of file diff --git a/python/ArgosTranslation/tests/test_argos_translate.py b/python/ArgosTranslation/tests/test_argos_translate.py new file mode 100644 index 00000000..edb38f3f --- /dev/null +++ b/python/ArgosTranslation/tests/test_argos_translate.py @@ -0,0 +1,139 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +from pathlib import Path +import sys +import unittest + +import mpf_component_api as mpf + +from argos_translation_component import ArgosTranslationComponent + +LOCAL_PATH = Path(__file__).parent +sys.path.insert(0, str(LOCAL_PATH.parent)) +TEST_DATA = LOCAL_PATH / 'data' + +SPANISH_SHORT_SAMPLE = '¿Dónde está la biblioteca?' +RUSSIAN_SHORT_SAMPLE = "Где библиотека?" +CHINESE_SHORT_SAMPLE = "谢谢。" +SHORT_OUTPUT = "Where's the library?" +SHORT_OUTPUT_CHINESE = "Thanks." + + +class TestArgosTranslation(unittest.TestCase): + + def test_generic_job(self): + ff_track = mpf.GenericTrack(-1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.GenericJob('Test Generic', 'test.pdf', dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_generic(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_plaintext_job(self): + job = mpf.GenericJob('Test Plaintext', str(TEST_DATA / 'spanish_short.txt'), + dict(DEFAULT_SOURCE_LANGUAGE='ES'), {}) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_generic(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_audio_job(self): + ff_track = mpf.AudioTrack(0, 1, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.AudioJob('Test Audio', 'test.wav', 0, 1, dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_audio(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_image_job(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_loc) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_image(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_video_job(self): + ff_track = mpf.VideoTrack( + 0, 1, -1, + { + 0: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=RUSSIAN_SHORT_SAMPLE, LANGUAGE='RU')) + }, + dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.VideoJob('Test Video', 'test.mp4', 0, 1, dict(DEFAULT_SOURCE_LANGUAGE=''), {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_video(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('es', result[0].frame_locations[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('ru', result[0].frame_locations[1].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[0].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION']) + + def test_language_behavior(self): + ff_track = mpf.VideoTrack( + 0, 1, -1, + { + 0: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=RUSSIAN_SHORT_SAMPLE, LANGUAGE='RU')), + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), + 2: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=CHINESE_SHORT_SAMPLE, LANGUAGE='ZH')), + 3: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SHORT_OUTPUT, LANGUAGE='EN')) + }, + dict(LANGUAGE='ES')) + job = mpf.VideoJob('Test Language', 'test.mp4', 0, 1, {}, {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_video(job) + + self.assertEqual(1, len(result)) + + # Should skip English tracks + self.assertEqual('TRUE', result[0].frame_locations[3].detection_properties['SKIPPED_TRANSLATION']) + + # Should automatically select the correct language for other tracks + self.assertEqual('ru', result[0].frame_locations[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('es', result[0].frame_locations[1].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('zh', result[0].frame_locations[2].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[0].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT_CHINESE, result[0].frame_locations[2].detection_properties['TRANSLATION']) + + + + + + From 081d9d8f36654a8bdfece5e2fe1f4eff85ca056d Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Fri, 16 Sep 2022 14:31:39 -0400 Subject: [PATCH 02/14] added tests, implemented all get_detections --- python/ArgosTranslation/Dockerfile | 53 +++++++++++++++++ .../argos_translation_component.py | 51 ++++++++-------- .../{ => descriptor}/descriptor.json | 49 +++++++-------- .../sample_argos_translator.py | 9 +-- .../tests/data/spanish_long.txt | 1 + ...translate.py => test_argos_translation.py} | 59 ++++++++++++++++++- 6 files changed, 166 insertions(+), 56 deletions(-) rename python/ArgosTranslation/plugin-files/{ => descriptor}/descriptor.json (61%) create mode 100644 python/ArgosTranslation/tests/data/spanish_long.txt rename python/ArgosTranslation/tests/{test_argos_translate.py => test_argos_translation.py} (68%) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index e69de29b..9bc338d9 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -0,0 +1,53 @@ +# syntax=docker/dockerfile:1.2 + +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +ARG BUILD_REGISTRY +ARG BUILD_TAG=latest +FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} + +RUN pip3 install --no-cache-dir 'argostranslate' \ + argospm install translate-zh_en \ + argospm install translate-de_en \ + argospm install translate-fr_en \ + argospm install translate-ru_en \ + argospm install translate-es_en + +ARG RUN_TESTS=false + +RUN --mount=target=.,readwrite \ + install-component.sh; \ + if [ "${RUN_TESTS,,}" == true ]; then python tests/test_argos_translation.py; fi + +LABEL org.label-schema.license="Apache 2.0" \ + org.label-schema.name="OpenMPF Argos Translation" \ + org.label-schema.schema-version="1.0" \ + org.label-schema.url="https://openmpf.github.io" \ + org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \ + org.label-schema.vendor="MITRE" + + diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py index 6b4d593e..8a5c1661 100644 --- a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -40,9 +40,6 @@ class ArgosTranslationComponent: detection_type = 'TRANSLATION' - def __init__(self): - logger.extra = {} - @staticmethod def get_detections_from_video(job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: logger.info(f'Received video job') @@ -74,7 +71,7 @@ def get_detections_from_image(job: mpf.ImageJob) -> Sequence[mpf.ImageLocation]: if job.feed_forward_location is None: raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( f'Component can only process feed forward ' - ' jobs, but no feed forward track provided. ') + 'jobs, but no feed forward track provided. ') tw = TranslationWrapper(job.job_properties) tw.add_translations(job.feed_forward_location.detection_properties) @@ -94,7 +91,7 @@ def get_detections_from_audio(job: mpf.AudioJob) -> Sequence[mpf.AudioTrack]: if job.feed_forward_track is None: raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( f'Component can only process feed forward ' - ' jobs, but no feed forward track provided. ') + 'jobs, but no feed forward track provided. ') tw = TranslationWrapper(job.job_properties) tw.add_translations(job.feed_forward_track.detection_properties) @@ -133,7 +130,7 @@ def get_detections_from_generic(job: mpf.GenericJob) -> Sequence[mpf.GenericTrac class TranslationWrapper: def __init__(self, job_props): - self.supported_languages = self.get_supported_languages() + self.supported_languages = self.get_supported_languages_codes() self._installed_languages = translate.get_installed_languages() self.installed_lang_codes = [lang.code for lang in self._installed_languages] @@ -160,7 +157,7 @@ def __init__(self, job_props): self._from_lang = mpf_util.get_property( properties=job_props, - key='DEFAULT_FROM_LANGUAGE', + key='DEFAULT_SOURCE_LANGUAGE', default_value='es', prop_type=str ).lower().strip() @@ -168,11 +165,11 @@ def __init__(self, job_props): self._to_lang = "en" @staticmethod - def get_supported_languages(): + def get_supported_languages_codes(): try: available_packages = package.get_available_packages() except Exception: - # TODO log downloading package index + logger.info("Downloading package index.") package.update_package_index() available_packages = package.get_available_packages() @@ -198,8 +195,12 @@ def add_translations(self, ff_props: Dict[str, str]): if lang in self.supported_languages: self._from_lang = lang break - if lang == 'en': + elif lang == 'en': self._from_lang = lang + else: + raise mpf.DetectionError.DETECTION_FAILED.exception( + f"Source language, {lang}, is not supported." + ) else: if self._from_lang == 'en': ff_props['SKIPPED_TRANSLATION'] = 'TRUE' @@ -208,30 +209,23 @@ def add_translations(self, ff_props: Dict[str, str]): return if self._from_lang not in self.supported_languages: raise mpf.DetectionError.DETECTION_FAILED.exception( - f"Source language, {self._from_lang}, is not " - "supported." + f"Default source language, {self._from_lang}, is not supported." ) - # print(self._from_lang) - print(self.supported_languages) - # TODO use valid translations to determine if package needs to be downloaded - # valid_translations = list( - # filter(lambda x: x.to_lang.code == self._to_lang, from_lang.translations_from) - # ) if self._from_lang not in self.installed_lang_codes: - # print("installing language", self._from_lang) + logger.info(f"Language {self._from_lang} is not installed. Installing package.") available_packages = package.get_available_packages() - # print(available_packages) available_package = list( filter( lambda x: x.from_code == self._from_lang and x.to_code == self._to_lang, available_packages ) )[0] - # print(available_package) + download_path = available_package.download() - # print(download_path) package.install_from_path(download_path) + logger.info(f"Successfully installed {self._from_lang}") + self.installed_lang_codes = [lang.code for lang in translate.get_installed_languages()] self._installed_languages = translate.get_installed_languages() @@ -242,12 +236,19 @@ def add_translations(self, ff_props: Dict[str, str]): lambda x: x.code == self._to_lang, self._installed_languages))[0] - # TODO this will be none if no valid translation model for from_lang->to_lang translation = from_lang.get_translation(to_lang) - # print(translation) + if translation is None: + raise mpf.DetectionError.DETECTION_FAILED.exception( + f"No valid translation model from {self._from_lang} to {self._to_lang}, " + f"check if any packages are missing." + ) + + logger.info(f"Translating the {prop_to_translate} property") + translated_text = translation.translate(input_text) - # print(translated_text) + + logger.info("Translation complete.") ff_props['TRANSLATION_SOURCE_LANGUAGE'] = self._from_lang ff_props['TRANSLATION'] = translated_text diff --git a/python/ArgosTranslation/plugin-files/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json similarity index 61% rename from python/ArgosTranslation/plugin-files/descriptor.json rename to python/ArgosTranslation/plugin-files/descriptor/descriptor.json index eef745a7..ec0b65aa 100644 --- a/python/ArgosTranslation/plugin-files/descriptor.json +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -1,13 +1,13 @@ { - "componentName": "AzureTranslation", + "componentName": "ArgosTranslation", "componentVersion": "7.0", "middlewareVersion": "7.0", "sourceLanguage": "python", - "batchLibrary": "AzureTranslation", + "batchLibrary": "ArgosTranslation", "environmentVariables": [], "algorithm": { - "name": "AZURETRANSLATION", - "description": "Uses Azure Cognitive Services to perform translation.", + "name": "ARGOSTRANSLATION", + "description": "Uses Argos Translate to perform translation.", "actionType": "DETECTION", "requiresCollection": { "states": [] @@ -16,7 +16,7 @@ "states": [ "DETECTION", "DETECTION_TRANSLATION", - "DETECTION_TRANSLATION_AZURE" + "DETECTION_TRANSLATION_ARGOS" ], "properties": [ { @@ -32,13 +32,7 @@ "defaultValue": "DECODED_LANGUAGE,LANGUAGE" }, { - "name": "TO_LANGUAGE", - "description": "The ISO 639 language code for language that the properties should be translated to.", - "type": "STRING", - "defaultValue": "en" - }, - { - "name": "DEFAULT_FROM_LANGUAGE", + "name": "DEFAULT_SOURCE_LANGUAGE", "description": "Optional property that indicates the source language of the text.", "type": "STRING", "defaultValue": "es" @@ -48,9 +42,9 @@ }, "actions": [ { - "name": "", - "description": "", - "algorithm": "", + "name": "ARGOS TRANSLATION (WITH FF REGION) ACTION", + "description": "Uses Argos Translation to perform translation on feed-forward tracks and detections.", + "algorithm": "ARGOSTRANSLATION", "properties": [ { "name": "FEED_FORWARD_TYPE", @@ -63,28 +57,35 @@ ] }, { - "name": "", - "description": "Uses Argos Translate to perform translation on a plain text file.", - "algorithm": "", + "name": "ARGOS TRANSLATION TEXT FILE ACTION", + "description": "Uses Argos Translation to perform translation on a plain text file.", + "algorithm": "ARGOSTRANSLATION", "properties": [ ] } ], "tasks": [ { - "name": "", - "description": "", + "name": "ARGOS TRANSLATION (WITH FF REGION) TASK", + "description": "Uses Argos Translate to perform translation on feed-forward tracks and detections.", + "actions": [ + "ARGOS TRANSLATION (WITH FF REGION) ACTION" + ] + }, + { + "name": "ARGOS TRANSLATION TEXT FILE TASK", + "description": "Uses Argos Translate to perform translation on a plain text file.", "actions": [ - "" + "ARGOS TRANSLATION TEXT FILE ACTION" ] } ], "pipelines": [ { - "name": "", - "description": "", + "name": "ARGOS TRANSLATION TEXT FILE PIPELINE", + "description": "Uses Argos Translate to perform translation on a plain text file.", "tasks": [ - "" + "ARGOS TRANSLATION TEXT FILE TASK" ] } ] diff --git a/python/ArgosTranslation/sample_argos_translator.py b/python/ArgosTranslation/sample_argos_translator.py index f2a8e474..2accc53a 100644 --- a/python/ArgosTranslation/sample_argos_translator.py +++ b/python/ArgosTranslation/sample_argos_translator.py @@ -37,16 +37,13 @@ def main(): _, from_lang, text = sys.argv - detection_props = dict(TEXT="Bonjour") - job_props = dict(FROM_LANGUAGE=from_lang) + detection_props = dict(TEXT=text) + job_props = dict(DEFAULT_SOURCE_LANGUAGE=from_lang) TranslationWrapper(job_props).add_translations(detection_props) - """ - print('TRANSLATION SOURCE LANGUAGE:', detection_props['TRANSLATION SOURCE LANGUAGE']) - print('TRANSLATION SOURCE LANGUAGE CONFIDENCE:', detection_props['TRANSLATION SOURCE LANGUAGE CONFIDENCE']) + print('TRANSLATION SOURCE LANGUAGE:', detection_props['TRANSLATION_SOURCE_LANGUAGE']) print('TRANSLATION:') print(detection_props['TRANSLATION']) - """ if __name__ == '__main__': diff --git a/python/ArgosTranslation/tests/data/spanish_long.txt b/python/ArgosTranslation/tests/data/spanish_long.txt new file mode 100644 index 00000000..c1287f81 --- /dev/null +++ b/python/ArgosTranslation/tests/data/spanish_long.txt @@ -0,0 +1 @@ +Sostenemos como evidentes estas verdades: que todos los hombres son creados iguales, que son dotados por su Creador de ciertos derechos inalienables, que entre éstos están la vida, la libertad y la búsqueda de la felicidad. Que para gara ntizar estos derechos se instituyen entre los hombres los gobiernos, que derivan sus poderes legítimos del consentimiento de los gobernados. Que cuando quiera que una forma de gobierno se haga destructora de estos principios, el pueblo tiene el derec ho a reformarla o abolirla e instituir un nuevo gobierno que se funde en dichos principios, y a organizar sus poderes en la forma que a su juicio ofrecerá las mayores probabilidades de alcanzar su seguridad y felicidad. \ No newline at end of file diff --git a/python/ArgosTranslation/tests/test_argos_translate.py b/python/ArgosTranslation/tests/test_argos_translation.py similarity index 68% rename from python/ArgosTranslation/tests/test_argos_translate.py rename to python/ArgosTranslation/tests/test_argos_translation.py index edb38f3f..5ed0a2cc 100644 --- a/python/ArgosTranslation/tests/test_argos_translate.py +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -42,6 +42,18 @@ SHORT_OUTPUT = "Where's the library?" SHORT_OUTPUT_CHINESE = "Thanks." +LONG_OUTPUT = ( + "We hold as evident these truths: that all men are created equal, " + "that they are endowed by their Creator with certain inalienable rights, " + "which among them are life, liberty and the pursuit of happiness. " + "That in order to nurture these rights, governments are instituted among men, " + "which derive their legitimate powers from the consent of the governed. " + "Whenever a form of government becomes destroyer of these principles, " + "the people have the right to reform or abolish it and to institute a new government " + "that is founded on these principles, and to organize their powers in the way that in " + "their opinion will offer the greatest chance of achieving their security and happiness." +) + class TestArgosTranslation(unittest.TestCase): @@ -124,7 +136,6 @@ def test_language_behavior(self): # Should skip English tracks self.assertEqual('TRUE', result[0].frame_locations[3].detection_properties['SKIPPED_TRANSLATION']) - # Should automatically select the correct language for other tracks self.assertEqual('ru', result[0].frame_locations[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) self.assertEqual('es', result[0].frame_locations[1].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) self.assertEqual('zh', result[0].frame_locations[2].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) @@ -132,8 +143,54 @@ def test_language_behavior(self): self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION']) self.assertEqual(SHORT_OUTPUT_CHINESE, result[0].frame_locations[2].detection_properties['TRANSLATION']) + def test_large_text(self): + comp = ArgosTranslationComponent() + job = mpf.GenericJob( + job_name='Test Sentence Length', + data_uri=str(TEST_DATA / 'spanish_long.txt'), + job_properties=dict(DEFAULT_SOURCE_LANGUAGE='ES'), + media_properties={}, + feed_forward_track=None + ) + + result = comp.get_detections_from_generic(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(LONG_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_no_feed_forward_location(self): + comp = ArgosTranslationComponent() + job = mpf.ImageJob('Test', 'test.jpg', {}, {}) + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_image(job)) + self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) + def test_no_feed_forward_track(self): + comp = ArgosTranslationComponent() + job = mpf.VideoJob('test', 'test.mp4', 0, 1, {}, {}) + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_video(job)) + self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) + job = mpf.AudioJob('Test Audio', 'test.wav', 0, 1, {}, {}) + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_audio(job)) + self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) + def test_unsupported_language(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) + job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='es'), {}, ff_loc) + comp = ArgosTranslationComponent() + + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_image(job)) + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + job = mpf.GenericJob('Test Plaintext', str(TEST_DATA / 'spanish_short.txt'), + dict(DEFAULT_SOURCE_LANGUAGE='SPA'), {}) + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_generic(job)) + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) \ No newline at end of file From adb229e72aef9491c82a985cd9d0d6d1fdb0f7e8 Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Thu, 6 Oct 2022 14:28:38 -0400 Subject: [PATCH 03/14] added readme, fixed bug in Dockerfile --- python/ArgosTranslation/Dockerfile | 3 +- python/ArgosTranslation/README.md | 58 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 python/ArgosTranslation/README.md diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index 9bc338d9..4053a870 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -30,8 +30,9 @@ ARG BUILD_REGISTRY ARG BUILD_TAG=latest FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} +argospm update + RUN pip3 install --no-cache-dir 'argostranslate' \ - argospm install translate-zh_en \ argospm install translate-de_en \ argospm install translate-fr_en \ argospm install translate-ru_en \ diff --git a/python/ArgosTranslation/README.md b/python/ArgosTranslation/README.md new file mode 100644 index 00000000..35e20246 --- /dev/null +++ b/python/ArgosTranslation/README.md @@ -0,0 +1,58 @@ +# Overview + +This repository contains source code for the OpenMPF Argos Translation Component. + +This component translates the input text from a given source language to English. The source language can be provided as a job property, or be indicated in the detection properties from a feed-forward track. + + +# Job Properties +The below properties can be optionally provided to alter the behavior of the component. + +- `FEED_FORWARD_PROP_TO_PROCESS`: Controls which properties of the feed-forward track or detection are considered for translation. This should be a comma-separated list of property names (default: `"TEXT,TRANSCRIPT"`). The first named property in the list that is present is translated. At most, one property will be translated. + +- `LANGUAGE_FEED_FORWARD_PROP`: As above, a comma-separated list of property names (default: `"DECODED_LANGUAGE,LANGUAGE"`), which indicate which property of the feed-forward track or detection may be used to determine the source language of the text to be translated. This language is expected to be provided as an ISO 639-1 language code. + +- `DEFAULT_SOURCE_LANGUAGE`: The default source language to use if none of the property names listed in `LANGUAGE_FEED_FORWARD_PROP` are present in a feed-forward track or detection. This language is used when running a generic job with a raw text file (hence no feed-forward tracks). + + +# Language Identifiers +The following are the ISO 639-1 codes and their corresponding languages which Argos Translate version 1.7.0 can translate to English. + +All translations are either to English or from English. When trying to translate from one non-English language to another, Argos will automatically pivot between languages using the currently installed packages. For example, for Spanish->French Argos would pivot from Spanish->English to English->French. This is associated with a drop in accuracy and increase in runtime. + +Lanuage packages are downloaded dynamically as needed. In addition, when building a Docker image the Dockerfile pre-installs German, French, Russian, and Spanish. + +Note: Argos underperforms when translating to and from Chinese + +| ISO | Language | +| --- |------------------| +| `ar` | Arabic | +| `az` | Azerbaijani | +| `zh` | Chinese | +| `cs` | Czech | +| `da` | Danish | +| `nl` | Dutch | +| `eo` | Esperanto | +| `fi` | Finnish | +| `fr` | French | +| `de` | German | +| `el` | Greek | +| `he` | Hebrew | +| `hi` | Hindi | +| `hu` | Hungarian | +| `id` | Indonesian | +| `ga` | Irish | +| `it` | Italian | +| `ja` | Japanese | +| `ko` | Korean | +| `fa` | Persian | +| `pl` | Polish | +| `pt` | Portuguese | +| `ru` | Russian | +| `sk` | Slovak | +| `es` | Spanish | +| `sv` | Swedish | +| `tr` | Turkish | +| `uk` | Ukrainian | + + From 41e0cc4e900c3d1bbb9cc846dfc7733f283850cb Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Thu, 6 Oct 2022 14:32:12 -0400 Subject: [PATCH 04/14] fixed bug in Dockerfile --- python/ArgosTranslation/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index 4053a870..f3907574 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -30,7 +30,7 @@ ARG BUILD_REGISTRY ARG BUILD_TAG=latest FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} -argospm update +RUN argospm update RUN pip3 install --no-cache-dir 'argostranslate' \ argospm install translate-de_en \ From 3c5520f0915b9e00917a307f13b7825b8620a924 Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Thu, 6 Oct 2022 14:33:39 -0400 Subject: [PATCH 05/14] fixed bug in Dockerfile --- python/ArgosTranslation/Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index f3907574..9655b8ed 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -30,10 +30,12 @@ ARG BUILD_REGISTRY ARG BUILD_TAG=latest FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} -RUN argospm update -RUN pip3 install --no-cache-dir 'argostranslate' \ - argospm install translate-de_en \ +RUN pip3 install --no-cache-dir 'argostranslate' + +RUN argospm update + +RUN argospm install translate-de_en \ argospm install translate-fr_en \ argospm install translate-ru_en \ argospm install translate-es_en From ad30c858d5d6c7d1bfc64f21c7f2877d520b2c5d Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Thu, 5 Jan 2023 17:02:56 -0500 Subject: [PATCH 06/14] added NOTICE, added ISO 639-2 mappings, refactored get_detections methods --- python/ArgosTranslation/Dockerfile | 4 +- python/ArgosTranslation/README.md | 64 ++++----- .../argos_translation_component.py | 133 ++++++++++-------- .../plugin-files/descriptor/descriptor.json | 2 +- python/ArgosTranslation/tests/data/NOTICE | 4 + .../tests/test_argos_translation.py | 14 +- 6 files changed, 123 insertions(+), 98 deletions(-) create mode 100644 python/ArgosTranslation/tests/data/NOTICE diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index 9655b8ed..cff6c19e 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -34,7 +34,9 @@ FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} RUN pip3 install --no-cache-dir 'argostranslate' RUN argospm update - + +# To download a language model use argospm install translate__en +# Refer to README for list of supported languages RUN argospm install translate-de_en \ argospm install translate-fr_en \ argospm install translate-ru_en \ diff --git a/python/ArgosTranslation/README.md b/python/ArgosTranslation/README.md index 35e20246..d6f5b598 100644 --- a/python/ArgosTranslation/README.md +++ b/python/ArgosTranslation/README.md @@ -10,13 +10,13 @@ The below properties can be optionally provided to alter the behavior of the com - `FEED_FORWARD_PROP_TO_PROCESS`: Controls which properties of the feed-forward track or detection are considered for translation. This should be a comma-separated list of property names (default: `"TEXT,TRANSCRIPT"`). The first named property in the list that is present is translated. At most, one property will be translated. -- `LANGUAGE_FEED_FORWARD_PROP`: As above, a comma-separated list of property names (default: `"DECODED_LANGUAGE,LANGUAGE"`), which indicate which property of the feed-forward track or detection may be used to determine the source language of the text to be translated. This language is expected to be provided as an ISO 639-1 language code. +- `LANGUAGE_FEED_FORWARD_PROP`: As above, a comma-separated list of property names (default: `"DECODED_LANGUAGE,LANGUAGE"`), which indicate which property of the feed-forward track or detection may be used to determine the source language of the text to be translated. This language is expected to be provided as an ISO 639-1 or and ISO 639-2 language code. - `DEFAULT_SOURCE_LANGUAGE`: The default source language to use if none of the property names listed in `LANGUAGE_FEED_FORWARD_PROP` are present in a feed-forward track or detection. This language is used when running a generic job with a raw text file (hence no feed-forward tracks). # Language Identifiers -The following are the ISO 639-1 codes and their corresponding languages which Argos Translate version 1.7.0 can translate to English. +The following are the ISO 639-1 codes, the ISO 639-2 codes, and their corresponding languages which Argos Translate version 1.7.0 can translate to English. All translations are either to English or from English. When trying to translate from one non-English language to another, Argos will automatically pivot between languages using the currently installed packages. For example, for Spanish->French Argos would pivot from Spanish->English to English->French. This is associated with a drop in accuracy and increase in runtime. @@ -24,35 +24,35 @@ Lanuage packages are downloaded dynamically as needed. In addition, when buildin Note: Argos underperforms when translating to and from Chinese -| ISO | Language | -| --- |------------------| -| `ar` | Arabic | -| `az` | Azerbaijani | -| `zh` | Chinese | -| `cs` | Czech | -| `da` | Danish | -| `nl` | Dutch | -| `eo` | Esperanto | -| `fi` | Finnish | -| `fr` | French | -| `de` | German | -| `el` | Greek | -| `he` | Hebrew | -| `hi` | Hindi | -| `hu` | Hungarian | -| `id` | Indonesian | -| `ga` | Irish | -| `it` | Italian | -| `ja` | Japanese | -| `ko` | Korean | -| `fa` | Persian | -| `pl` | Polish | -| `pt` | Portuguese | -| `ru` | Russian | -| `sk` | Slovak | -| `es` | Spanish | -| `sv` | Swedish | -| `tr` | Turkish | -| `uk` | Ukrainian | +| ISO-639-1 | ISO-639-2 | Language | +| --- |---|------------------| +| `ar` | `ara` | Arabic | +| `az` | `aze` | Azerbaijani | +| `zh` | `cmn` | Chinese | +| `cs` | `ces` | Czech | +| `da` | `dan` | Danish | +| `nl` | `nld` | Dutch | +| `eo` | `epo` | Esperanto | +| `fi` | `fin` | Finnish | +| `fr` | `fra` | French | +| `de` | `deu` | German | +| `el` | `ell` | Greek | +| `he` | `heb` | Hebrew | +| `hi` | `hin` | Hindi | +| `hu` | `hun` | Hungarian | +| `id` | `ind` | Indonesian | +| `ga` | `gle` | Irish | +| `it` | `ita` | Italian | +| `ja` | `jpn` | Japanese | +| `ko` | `kor` | Korean | +| `fa` | `fas` | Persian | +| `pl` | `pol` | Polish | +| `pt` | `por` | Portuguese | +| `ru` | `rus` | Russian | +| `sk` | `slk` | Slovak | +| `es` | `spa` | Spanish | +| `sv` | `swe` | Swedish | +| `tr` | `tur` | Turkish | +| `uk` | `ukr` | Ukrainian | diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py index 8a5c1661..c0041aa3 100644 --- a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -33,78 +33,30 @@ import mpf_component_api as mpf import mpf_component_util as mpf_util - logger = logging.getLogger('ArgosTranslationComponent') class ArgosTranslationComponent: detection_type = 'TRANSLATION' - @staticmethod - def get_detections_from_video(job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: - logger.info(f'Received video job') - - try: - if job.feed_forward_track is None: - raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( - f'Component can only process feed forward ' - ' jobs, but no feed forward track provided. ') - - tw = TranslationWrapper(job.job_properties) - tw.add_translations(job.feed_forward_track.detection_properties) - - for ff_location in job.feed_forward_track.frame_locations.values(): - tw.add_translations(ff_location.detection_properties) - - return [job.feed_forward_track] - - except Exception: - logger.exception( - f'Failed to complete job due to the following exception:') - raise + def get_detections_from_video(self, job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: + logger.info(f'Received video job.') - @staticmethod - def get_detections_from_image(job: mpf.ImageJob) -> Sequence[mpf.ImageLocation]: - logger.info(f'Received image job') + return self.get_feed_forward_detections(job, job.feed_forward_track, video_job=True) - try: - if job.feed_forward_location is None: - raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( - f'Component can only process feed forward ' - 'jobs, but no feed forward track provided. ') + def get_detections_from_image(self, job: mpf.ImageJob) -> Sequence[mpf.ImageLocation]: + logger.info(f'Received image job.') - tw = TranslationWrapper(job.job_properties) - tw.add_translations(job.feed_forward_location.detection_properties) + return self.get_feed_forward_detections(job, job.feed_forward_location) - return [job.feed_forward_location] + def get_detections_from_audio(self, job: mpf.AudioJob) -> Sequence[mpf.AudioTrack]: + logger.info(f'Received audio job.') - except Exception: - logger.exception( - f'Failed to complete job due to the following exception:') - raise - - @staticmethod - def get_detections_from_audio(job: mpf.AudioJob) -> Sequence[mpf.AudioTrack]: - logger.info(f'Received audio job') - - try: - if job.feed_forward_track is None: - raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( - f'Component can only process feed forward ' - 'jobs, but no feed forward track provided. ') - - tw = TranslationWrapper(job.job_properties) - tw.add_translations(job.feed_forward_track.detection_properties) - - return [job.feed_forward_track] - - except Exception: - logger.exception( - f'Failed to complete job due to the following exception:') - raise + return self.get_feed_forward_detections(job, job.feed_forward_track) @staticmethod def get_detections_from_generic(job: mpf.GenericJob) -> Sequence[mpf.GenericTrack]: + logger.info(f'Received generic job.') if job.feed_forward_track: tw = TranslationWrapper(job.job_properties) tw.add_translations(job.feed_forward_track.detection_properties) @@ -125,7 +77,29 @@ def get_detections_from_generic(job: mpf.GenericJob) -> Sequence[mpf.GenericTrac tw = TranslationWrapper(new_job_props) tw.add_translations(new_ff_props) - return[ff_track] + return [ff_track] + + @staticmethod + def get_feed_forward_detections(job, job_feed_forward, video_job=False): + try: + if job_feed_forward is None: + raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( + f'Component can only process feed forward ' + ' jobs, but no feed forward track provided. ') + + tw = TranslationWrapper(job. job_properties) + tw.add_translations(job_feed_forward.detection_properties) + + if video_job: + for ff_location in job.feed_forward_track.frame_locations.values(): + tw.add_translations(ff_location.detection_properties) + + return [job_feed_forward] + + except Exception: + logger.exception( + f'Failed to complete job due to the following exception:') + raise class TranslationWrapper: @@ -164,6 +138,37 @@ def __init__(self, job_props): self._to_lang = "en" + self.iso_map = { + "ara": "ar", + "aze": "az", + "cmn": "zh", + "ces": "cs", + "dan": "da", + "nld": "nl", + "epo": "eo", + "fin": "fi", + "fra": "fr", + "deu": "de", + "ell": "el", + "heb": "he", + "hin": "hi", + "hun": "hu", + "ind": "id", + "gle": "ga", + "ita": "it", + "jpn": "ja", + "kor": "ko", + "fas": "fa", + "pol": "pl", + "por": "pt", + "rus": "ru", + "slk": "sk", + "spa": "es", + "swe": "sv", + "tur": "tr", + "ukr": "uk" + } + @staticmethod def get_supported_languages_codes(): try: @@ -186,7 +191,7 @@ def add_translations(self, ff_props: Dict[str, str]): if input_text: break else: - logger.warning("No text to translate found in track") + logger.warning("No text to translate found in track.") return for lang_prop_name in self._lang_prop_names: @@ -197,6 +202,8 @@ def add_translations(self, ff_props: Dict[str, str]): break elif lang == 'en': self._from_lang = lang + elif lang in self.iso_map: + self._from_lang = self.iso_map[lang] else: raise mpf.DetectionError.DETECTION_FAILED.exception( f"Source language, {lang}, is not supported." @@ -214,6 +221,8 @@ def add_translations(self, ff_props: Dict[str, str]): if self._from_lang not in self.installed_lang_codes: logger.info(f"Language {self._from_lang} is not installed. Installing package.") + + # From Argos Translate for downloading language models. available_packages = package.get_available_packages() available_package = list( filter( @@ -224,7 +233,7 @@ def add_translations(self, ff_props: Dict[str, str]): download_path = available_package.download() package.install_from_path(download_path) - logger.info(f"Successfully installed {self._from_lang}") + logger.info(f"Successfully installed {self._from_lang}.") self.installed_lang_codes = [lang.code for lang in translate.get_installed_languages()] self._installed_languages = translate.get_installed_languages() @@ -244,7 +253,7 @@ def add_translations(self, ff_props: Dict[str, str]): f"check if any packages are missing." ) - logger.info(f"Translating the {prop_to_translate} property") + logger.info(f"Translating the {prop_to_translate} property.") translated_text = translation.translate(input_text) diff --git a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json index ec0b65aa..680bc133 100644 --- a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -29,7 +29,7 @@ "name": "LANGUAGE_FEED_FORWARD_PROP", "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection determine the language from which to translate. If the first property listed is present, then that property will be used. If it's not, then the next property in the list is considered. If none are present, fall back to FROM_LANGUAGE.", "type": "STRING", - "defaultValue": "DECODED_LANGUAGE,LANGUAGE" + "defaultValue": "DECODED_LANGUAGE,LANGUAGE,ISO_LANGUAGE" }, { "name": "DEFAULT_SOURCE_LANGUAGE", diff --git a/python/ArgosTranslation/tests/data/NOTICE b/python/ArgosTranslation/tests/data/NOTICE new file mode 100644 index 00000000..a7262730 --- /dev/null +++ b/python/ArgosTranslation/tests/data/NOTICE @@ -0,0 +1,4 @@ +# spanish_long.txt +Contains part of The Declaration of Independence translated to Spanish. +Public Domain +https://www.state.gov/wp-content/uploads/2020/02/Spanish-translation-U.S.-Declaration-of-Independence.pdf \ No newline at end of file diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py index 5ed0a2cc..4a5e160f 100644 --- a/python/ArgosTranslation/tests/test_argos_translation.py +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -181,7 +181,7 @@ def test_no_feed_forward_track(self): self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) def test_unsupported_language(self): - ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='IS')) job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='es'), {}, ff_loc) comp = ArgosTranslationComponent() @@ -193,4 +193,14 @@ def test_unsupported_language(self): dict(DEFAULT_SOURCE_LANGUAGE='SPA'), {}) with self.assertRaises(mpf.DetectionException) as cm: list(comp.get_detections_from_generic(job)) - self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) \ No newline at end of file + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + def test_iso_map(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) + job = mpf.ImageJob('Test Image', 'test.jpg', {}, {}, ff_loc) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_image(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) \ No newline at end of file From 4a70a0999649527b7ec6262f4043f2a10d61881e Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Thu, 16 Mar 2023 11:29:02 -0400 Subject: [PATCH 07/14] added LICENSE, updated copyright, added caching of previous results to minimize processing time --- python/ArgosTranslation/Dockerfile | 4 +-- python/ArgosTranslation/LICENSE | 31 ++++++++++++++++ .../argos_translation_component.py | 13 +++++-- .../plugin-files/descriptor/descriptor.json | 6 ++-- python/ArgosTranslation/pyproject.toml | 6 ++-- .../sample_argos_translator.py | 4 +-- python/ArgosTranslation/setup.cfg | 6 ++-- .../tests/test_argos_translation.py | 35 +++++++++++++++++-- 8 files changed, 87 insertions(+), 18 deletions(-) create mode 100644 python/ArgosTranslation/LICENSE diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index cff6c19e..813f04c8 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -7,11 +7,11 @@ # under contract, and is subject to the Rights in Data-General Clause # # 52.227-14, Alt. IV (DEC 2007). # # # -# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # ############################################################################# ############################################################################# -# Copyright 2022 The MITRE Corporation # +# Copyright 2023 The MITRE Corporation # # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # diff --git a/python/ArgosTranslation/LICENSE b/python/ArgosTranslation/LICENSE new file mode 100644 index 00000000..37cc426e --- /dev/null +++ b/python/ArgosTranslation/LICENSE @@ -0,0 +1,31 @@ +/****************************************************************************** +* Copyright 2023 The MITRE Corporation * +* * +* Licensed under the Apache License, Version 2.0 (the "License"); * +* you may not use this file except in compliance with the License. * +* You may obtain a copy of the License at * +* * +* http://www.apache.org/licenses/LICENSE-2.0 * +* * +* Unless required by applicable law or agreed to in writing, software * +* distributed under the License is distributed on an "AS IS" BASIS, * +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * +* See the License for the specific language governing permissions and * +* limitations under the License. * +******************************************************************************/ + +This project contains content developed by The MITRE Corporation. If this code +is used in a deployment or embedded within another project, it is requested +that you send an email to opensource@mitre.org in order to let us know where +this software is being used. + +This software makes use of source code and data files from third party software: + +-------------------------------------------------------------------------- + +Argos Translate is licensed under the MIT License. +Copyright (c) 2020 Argos Open Technologies, LLC + +Project Page: https://github.com/argosopentech/argos-translate + +-------------------------------------------------------------------------- \ No newline at end of file diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py index c0041aa3..de99332a 100644 --- a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -5,11 +5,11 @@ # under contract, and is subject to the Rights in Data-General Clause # # 52.227-14, Alt. IV (DEC 2007). # # # -# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # ############################################################################# ############################################################################# -# Copyright 2022 The MITRE Corporation # +# Copyright 2023 The MITRE Corporation # # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # @@ -169,6 +169,8 @@ def __init__(self, job_props): "ukr": "uk" } + self._translation_cache: Dict[str, (str, str)] = {} + @staticmethod def get_supported_languages_codes(): try: @@ -194,6 +196,11 @@ def add_translations(self, ff_props: Dict[str, str]): logger.warning("No text to translate found in track.") return + if cached_translation := self._translation_cache.get(input_text): + ff_props['TRANSLATION'] = cached_translation[0] + ff_props['TRANSLATION_SOURCE_LANGUAGE'] = cached_translation[1] + return + for lang_prop_name in self._lang_prop_names: if lang_prop_name in ff_props: lang = ff_props.get(lang_prop_name).lower().strip() @@ -257,6 +264,8 @@ def add_translations(self, ff_props: Dict[str, str]): translated_text = translation.translate(input_text) + self._translation_cache[input_text] = (translated_text, self._from_lang) + logger.info("Translation complete.") ff_props['TRANSLATION_SOURCE_LANGUAGE'] = self._from_lang diff --git a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json index 680bc133..51c700ff 100644 --- a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -1,7 +1,7 @@ { "componentName": "ArgosTranslation", - "componentVersion": "7.0", - "middlewareVersion": "7.0", + "componentVersion": "7.1", + "middlewareVersion": "7.1", "sourceLanguage": "python", "batchLibrary": "ArgosTranslation", "environmentVariables": [], @@ -89,4 +89,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/python/ArgosTranslation/pyproject.toml b/python/ArgosTranslation/pyproject.toml index 8717e0c8..d2aa20ab 100644 --- a/python/ArgosTranslation/pyproject.toml +++ b/python/ArgosTranslation/pyproject.toml @@ -5,11 +5,11 @@ # under contract, and is subject to the Rights in Data-General Clause # # 52.227-14, Alt. IV (DEC 2007). # # # -# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # ############################################################################# ############################################################################# -# Copyright 2022 The MITRE Corporation # +# Copyright 2023 The MITRE Corporation # # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # @@ -26,4 +26,4 @@ [build-system] requires = ["setuptools"] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" diff --git a/python/ArgosTranslation/sample_argos_translator.py b/python/ArgosTranslation/sample_argos_translator.py index 2accc53a..f0a299c1 100644 --- a/python/ArgosTranslation/sample_argos_translator.py +++ b/python/ArgosTranslation/sample_argos_translator.py @@ -7,11 +7,11 @@ # under contract, and is subject to the Rights in Data-General Clause # # 52.227-14, Alt. IV (DEC 2007). # # # -# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # ############################################################################# ############################################################################# -# Copyright 2022 The MITRE Corporation # +# Copyright 2023 The MITRE Corporation # # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # diff --git a/python/ArgosTranslation/setup.cfg b/python/ArgosTranslation/setup.cfg index 6eb952ea..6dbba2c2 100644 --- a/python/ArgosTranslation/setup.cfg +++ b/python/ArgosTranslation/setup.cfg @@ -5,11 +5,11 @@ # under contract, and is subject to the Rights in Data-General Clause # # 52.227-14, Alt. IV (DEC 2007). # # # -# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # ############################################################################# ############################################################################# -# Copyright 2022 The MITRE Corporation # +# Copyright 2023 The MITRE Corporation # # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # @@ -36,4 +36,4 @@ install_requires = [options.entry_points] mpf.exported_component = - component = argos_translation_component.argos_translation_component:ArgosTranslationComponent \ No newline at end of file + component = argos_translation_component.argos_translation_component:ArgosTranslationComponent diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py index 4a5e160f..90157cde 100644 --- a/python/ArgosTranslation/tests/test_argos_translation.py +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -5,11 +5,11 @@ # under contract, and is subject to the Rights in Data-General Clause # # 52.227-14, Alt. IV (DEC 2007). # # # -# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # ############################################################################# ############################################################################# -# Copyright 2022 The MITRE Corporation # +# Copyright 2023 The MITRE Corporation # # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # @@ -203,4 +203,33 @@ def test_iso_map(self): self.assertEqual(1, len(result)) self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) - self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) \ No newline at end of file + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_translation_cache(self): + ff_track = mpf.VideoTrack( + 0, 1, -1, + { + 0: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TRANSCRIPT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + }, + dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + + job = mpf.VideoJob('test', 'test.jpg', 0, 1, {}, {}, ff_track) + + comp = ArgosTranslationComponent() + results = comp.get_detections_from_video(job) + + self.assertEqual(1, len(results)) + result = results[0] + + self.assertEqual(SPANISH_SHORT_SAMPLE, result.detection_properties['TEXT']) + self.assertEqual(SHORT_OUTPUT, result.detection_properties['TRANSLATION']) + self.assertEqual('es', result.detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + + detection1 = result.frame_locations[0] + self.assertEqual(SPANISH_SHORT_SAMPLE, detection1.detection_properties['TEXT']) + self.assertEqual(SHORT_OUTPUT, detection1.detection_properties['TRANSLATION']) + + detection2 = result.frame_locations[1] + self.assertEqual(SPANISH_SHORT_SAMPLE, detection2.detection_properties['TRANSCRIPT']) + self.assertEqual(SHORT_OUTPUT, detection2.detection_properties['TRANSLATION']) From 022a41dee05195b85bed33f577cf0450a1bc9d48 Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Sat, 1 Apr 2023 03:58:31 -0400 Subject: [PATCH 08/14] Updating dockerfile. --- python/ArgosTranslation/Dockerfile | 9 +++++---- python/ArgosTranslation/setup.cfg | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index 813f04c8..b5725c27 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -30,16 +30,17 @@ ARG BUILD_REGISTRY ARG BUILD_TAG=latest FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} +RUN -RUN pip3 install --no-cache-dir 'argostranslate' +RUN pip3 install --no-cache-dir 'argostranslate>=1.8.0' RUN argospm update # To download a language model use argospm install translate__en # Refer to README for list of supported languages -RUN argospm install translate-de_en \ - argospm install translate-fr_en \ - argospm install translate-ru_en \ +RUN argospm install translate-de_en && \ + argospm install translate-fr_en && \ + argospm install translate-ru_en && \ argospm install translate-es_en ARG RUN_TESTS=false diff --git a/python/ArgosTranslation/setup.cfg b/python/ArgosTranslation/setup.cfg index 6dbba2c2..39083c32 100644 --- a/python/ArgosTranslation/setup.cfg +++ b/python/ArgosTranslation/setup.cfg @@ -33,6 +33,7 @@ packages = argos_translation_component install_requires = mpf_component_api>=7.0 mpf_component_util>=7.0 + argostranslate>=1.8.0 [options.entry_points] mpf.exported_component = From 58dc6c27582b44b237e14e1e0b8611714ed60506 Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Sat, 1 Apr 2023 05:00:40 -0400 Subject: [PATCH 09/14] config update --- python/ArgosTranslation/setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ArgosTranslation/setup.cfg b/python/ArgosTranslation/setup.cfg index 39083c32..38b8b5f5 100644 --- a/python/ArgosTranslation/setup.cfg +++ b/python/ArgosTranslation/setup.cfg @@ -31,8 +31,8 @@ version = 0.1 [options] packages = argos_translation_component install_requires = - mpf_component_api>=7.0 - mpf_component_util>=7.0 + mpf_component_api>=7.1 + mpf_component_util>=7.1 argostranslate>=1.8.0 [options.entry_points] From f8b12dd7080685013939aa65eaa633feb4d01b7a Mon Sep 17 00:00:00 2001 From: Howard W Huang <40070840+hhuangMITRE@users.noreply.github.com> Date: Mon, 3 Apr 2023 19:26:44 -0400 Subject: [PATCH 10/14] Update Dockerfile --- python/ArgosTranslation/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index b5725c27..a5dd6ea6 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -30,8 +30,6 @@ ARG BUILD_REGISTRY ARG BUILD_TAG=latest FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} -RUN - RUN pip3 install --no-cache-dir 'argostranslate>=1.8.0' RUN argospm update From 6fb55f5f77811b24e8f135c4b9800e989fd85cce Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Tue, 2 May 2023 10:58:07 -0400 Subject: [PATCH 11/14] Adding model download for Chinese. May want to add other models of interest. --- python/ArgosTranslation/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index a5dd6ea6..0510bc0e 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -39,6 +39,7 @@ RUN argospm update RUN argospm install translate-de_en && \ argospm install translate-fr_en && \ argospm install translate-ru_en && \ + argospm install translate-zh_en && \ argospm install translate-es_en ARG RUN_TESTS=false From 9e93c011a7d636342b0eeeabdc98a93bf16e23e3 Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Fri, 19 May 2023 14:01:32 -0400 Subject: [PATCH 12/14] Fixed Chinese test, changed default lang behavior --- python/ArgosTranslation/Dockerfile | 3 +- python/ArgosTranslation/README.md | 2 +- .../argos_translation_component.py | 13 +++++--- .../plugin-files/descriptor/descriptor.json | 4 +-- python/ArgosTranslation/setup.cfg | 6 ++-- .../tests/test_argos_translation.py | 33 +++++++++++++++++-- 6 files changed, 47 insertions(+), 14 deletions(-) diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile index 0510bc0e..705962b8 100644 --- a/python/ArgosTranslation/Dockerfile +++ b/python/ArgosTranslation/Dockerfile @@ -40,7 +40,8 @@ RUN argospm install translate-de_en && \ argospm install translate-fr_en && \ argospm install translate-ru_en && \ argospm install translate-zh_en && \ - argospm install translate-es_en + argospm install translate-es_en && \ + argospm install translate-ar_en ARG RUN_TESTS=false diff --git a/python/ArgosTranslation/README.md b/python/ArgosTranslation/README.md index d6f5b598..36b66392 100644 --- a/python/ArgosTranslation/README.md +++ b/python/ArgosTranslation/README.md @@ -20,7 +20,7 @@ The following are the ISO 639-1 codes, the ISO 639-2 codes, and their correspond All translations are either to English or from English. When trying to translate from one non-English language to another, Argos will automatically pivot between languages using the currently installed packages. For example, for Spanish->French Argos would pivot from Spanish->English to English->French. This is associated with a drop in accuracy and increase in runtime. -Lanuage packages are downloaded dynamically as needed. In addition, when building a Docker image the Dockerfile pre-installs German, French, Russian, and Spanish. +Language packages are downloaded dynamically as needed. In addition, when building a Docker image the Dockerfile pre-installs German, French, Russian, and Spanish. Note: Argos underperforms when translating to and from Chinese diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py index de99332a..05ffd752 100644 --- a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -107,6 +107,7 @@ def __init__(self, job_props): self.supported_languages = self.get_supported_languages_codes() self._installed_languages = translate.get_installed_languages() + self.installed_lang_codes = [lang.code for lang in self._installed_languages] self._props_to_translate = [ @@ -124,7 +125,7 @@ def __init__(self, job_props): mpf_util.get_property( properties=job_props, key='LANGUAGE_FEED_FORWARD_PROP', - default_value='DECODED_LANGUAGE,LANGUAGE', + default_value='ISO_LANGUAGE,DECODED_LANGUAGE,LANGUAGE', prop_type=str ).split(',') ] @@ -132,7 +133,7 @@ def __init__(self, job_props): self._from_lang = mpf_util.get_property( properties=job_props, key='DEFAULT_SOURCE_LANGUAGE', - default_value='es', + default_value='', prop_type=str ).lower().strip() @@ -169,7 +170,7 @@ def __init__(self, job_props): "ukr": "uk" } - self._translation_cache: Dict[str, (str, str)] = {} + self._translation_cache: Dict[str, Tuple[str, str]] = {} @staticmethod def get_supported_languages_codes(): @@ -221,7 +222,11 @@ def add_translations(self, ff_props: Dict[str, str]): logger.info(f'Skipped translation of the "{prop_to_translate}" ' f'property because it was already in the target language.') return - if self._from_lang not in self.supported_languages: + + if self._from_lang == "": + raise mpf.DetectionError.MISSING_PROPERTY.exception("LANGUAGE_FEED_FORWARD_PROP mismatch and no DEFAULT_SOURCE_LANGUAGE provided.") + + if self._from_lang != "" and self._from_lang not in self.supported_languages: raise mpf.DetectionError.DETECTION_FAILED.exception( f"Default source language, {self._from_lang}, is not supported." ) diff --git a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json index 51c700ff..e636afcb 100644 --- a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -29,13 +29,13 @@ "name": "LANGUAGE_FEED_FORWARD_PROP", "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection determine the language from which to translate. If the first property listed is present, then that property will be used. If it's not, then the next property in the list is considered. If none are present, fall back to FROM_LANGUAGE.", "type": "STRING", - "defaultValue": "DECODED_LANGUAGE,LANGUAGE,ISO_LANGUAGE" + "defaultValue": "ISO_LANGUAGE,DECODED_LANGUAGE,LANGUAGE" }, { "name": "DEFAULT_SOURCE_LANGUAGE", "description": "Optional property that indicates the source language of the text.", "type": "STRING", - "defaultValue": "es" + "defaultValue": "" } ] } diff --git a/python/ArgosTranslation/setup.cfg b/python/ArgosTranslation/setup.cfg index 38b8b5f5..d259ffd6 100644 --- a/python/ArgosTranslation/setup.cfg +++ b/python/ArgosTranslation/setup.cfg @@ -26,13 +26,13 @@ [metadata] name = ArgosTranslation -version = 0.1 +version = 7.2 [options] packages = argos_translation_component install_requires = - mpf_component_api>=7.1 - mpf_component_util>=7.1 + mpf_component_api>=7.2 + mpf_component_util>=7.2 argostranslate>=1.8.0 [options.entry_points] diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py index 90157cde..42cff5af 100644 --- a/python/ArgosTranslation/tests/test_argos_translation.py +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -40,7 +40,7 @@ RUSSIAN_SHORT_SAMPLE = "Где библиотека?" CHINESE_SHORT_SAMPLE = "谢谢。" SHORT_OUTPUT = "Where's the library?" -SHORT_OUTPUT_CHINESE = "Thanks." +SHORT_OUTPUT_CHINESE = "Thank you." LONG_OUTPUT = ( "We hold as evident these truths: that all men are created equal, " @@ -56,7 +56,7 @@ class TestArgosTranslation(unittest.TestCase): - + def test_generic_job(self): ff_track = mpf.GenericTrack(-1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) job = mpf.GenericJob('Test Generic', 'test.pdf', dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_track) @@ -189,12 +189,24 @@ def test_unsupported_language(self): list(comp.get_detections_from_image(job)) self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + job = mpf.GenericJob('Test Plaintext', str(TEST_DATA / 'spanish_short.txt'), dict(DEFAULT_SOURCE_LANGUAGE='SPA'), {}) with self.assertRaises(mpf.DetectionException) as cm: list(comp.get_detections_from_generic(job)) self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANG='ES')) + job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='SPA'), {}, ff_loc) + comp = ArgosTranslationComponent() + + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_image(job)) + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + + def test_iso_map(self): ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) job = mpf.ImageJob('Test Image', 'test.jpg', {}, {}, ff_loc) @@ -210,7 +222,7 @@ def test_translation_cache(self): 0, 1, -1, { 0: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), - 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TRANSCRIPT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TRANSCRIPT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) }, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) @@ -233,3 +245,18 @@ def test_translation_cache(self): detection2 = result.frame_locations[1] self.assertEqual(SPANISH_SHORT_SAMPLE, detection2.detection_properties['TRANSCRIPT']) self.assertEqual(SHORT_OUTPUT, detection2.detection_properties['TRANSLATION']) + + def test_no_feed_forward_prop_no_default_lang(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANG='ES')) + job = mpf.ImageJob('Test Image', 'test.jpg', {}, {}, ff_loc) + comp = ArgosTranslationComponent() + + with self.assertRaises(mpf.DetectionException) as cm: + comp.get_detections_from_image(job) + self.assertEqual(mpf.DetectionError.MISSING_PROPERTY, cm.exception.error_code) + + + + +if __name__ == '__main__': + unittest.main() From 53a27122e369f4e3d17c3859fd92fb8e602916e0 Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Wed, 7 Jun 2023 15:58:17 -0400 Subject: [PATCH 13/14] updated Chinese output in test --- .../ArgosTranslation/plugin-files/descriptor/descriptor.json | 4 ++-- python/ArgosTranslation/tests/test_argos_translation.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json index e636afcb..1cd3f1ef 100644 --- a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -1,7 +1,7 @@ { "componentName": "ArgosTranslation", - "componentVersion": "7.1", - "middlewareVersion": "7.1", + "componentVersion": "7.2", + "middlewareVersion": "7.2", "sourceLanguage": "python", "batchLibrary": "ArgosTranslation", "environmentVariables": [], diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py index 42cff5af..10eb4302 100644 --- a/python/ArgosTranslation/tests/test_argos_translation.py +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -38,9 +38,9 @@ SPANISH_SHORT_SAMPLE = '¿Dónde está la biblioteca?' RUSSIAN_SHORT_SAMPLE = "Где библиотека?" -CHINESE_SHORT_SAMPLE = "谢谢。" +CHINESE_SHORT_SAMPLE = "你好,你叫什么名字?" SHORT_OUTPUT = "Where's the library?" -SHORT_OUTPUT_CHINESE = "Thank you." +SHORT_OUTPUT_CHINESE = "You have good names?" LONG_OUTPUT = ( "We hold as evident these truths: that all men are created equal, " From b7242a1a858323f96940dc28b2f817b649d70e5f Mon Sep 17 00:00:00 2001 From: mcrenshaw Date: Tue, 29 Aug 2023 16:03:57 -0400 Subject: [PATCH 14/14] changed error message and updated expected test output --- .../argos_translation_component.py | 5 ++++- python/ArgosTranslation/tests/test_argos_translation.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py index 05ffd752..ea499be6 100644 --- a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -224,7 +224,10 @@ def add_translations(self, ff_props: Dict[str, str]): return if self._from_lang == "": - raise mpf.DetectionError.MISSING_PROPERTY.exception("LANGUAGE_FEED_FORWARD_PROP mismatch and no DEFAULT_SOURCE_LANGUAGE provided.") + raise mpf.DetectionError.MISSING_PROPERTY.exception( + 'None of the properties from "LANGUAGE_FEED_FORWARD_PROP" ' + f'({self._lang_prop_names}) were found in the feed forward track and no ' + '"DEFAULT_SOURCE_LANGUAGE" was provided.') if self._from_lang != "" and self._from_lang not in self.supported_languages: raise mpf.DetectionError.DETECTION_FAILED.exception( diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py index 10eb4302..c32e9cff 100644 --- a/python/ArgosTranslation/tests/test_argos_translation.py +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -50,7 +50,7 @@ "which derive their legitimate powers from the consent of the governed. " "Whenever a form of government becomes destroyer of these principles, " "the people have the right to reform or abolish it and to institute a new government " - "that is founded on these principles, and to organize their powers in the way that in " + "that is founded on those principles, and to organize their powers in the way that in " "their opinion will offer the greatest chance of achieving their security and happiness." )