diff --git a/python/ArgosTranslation/Dockerfile b/python/ArgosTranslation/Dockerfile new file mode 100644 index 00000000..705962b8 --- /dev/null +++ b/python/ArgosTranslation/Dockerfile @@ -0,0 +1,59 @@ +# syntax=docker/dockerfile:1.2 + +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2023 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +ARG BUILD_REGISTRY +ARG BUILD_TAG=latest +FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} + +RUN pip3 install --no-cache-dir 'argostranslate>=1.8.0' + +RUN argospm update + +# To download a language model use argospm install translate__en +# Refer to README for list of supported languages +RUN argospm install translate-de_en && \ + argospm install translate-fr_en && \ + argospm install translate-ru_en && \ + argospm install translate-zh_en && \ + argospm install translate-es_en && \ + argospm install translate-ar_en + +ARG RUN_TESTS=false + +RUN --mount=target=.,readwrite \ + install-component.sh; \ + if [ "${RUN_TESTS,,}" == true ]; then python tests/test_argos_translation.py; fi + +LABEL org.label-schema.license="Apache 2.0" \ + org.label-schema.name="OpenMPF Argos Translation" \ + org.label-schema.schema-version="1.0" \ + org.label-schema.url="https://openmpf.github.io" \ + org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \ + org.label-schema.vendor="MITRE" + + diff --git a/python/ArgosTranslation/LICENSE b/python/ArgosTranslation/LICENSE new file mode 100644 index 00000000..37cc426e --- /dev/null +++ b/python/ArgosTranslation/LICENSE @@ -0,0 +1,31 @@ +/****************************************************************************** +* Copyright 2023 The MITRE Corporation * +* * +* Licensed under the Apache License, Version 2.0 (the "License"); * +* you may not use this file except in compliance with the License. * +* You may obtain a copy of the License at * +* * +* http://www.apache.org/licenses/LICENSE-2.0 * +* * +* Unless required by applicable law or agreed to in writing, software * +* distributed under the License is distributed on an "AS IS" BASIS, * +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * +* See the License for the specific language governing permissions and * +* limitations under the License. * +******************************************************************************/ + +This project contains content developed by The MITRE Corporation. If this code +is used in a deployment or embedded within another project, it is requested +that you send an email to opensource@mitre.org in order to let us know where +this software is being used. + +This software makes use of source code and data files from third party software: + +-------------------------------------------------------------------------- + +Argos Translate is licensed under the MIT License. +Copyright (c) 2020 Argos Open Technologies, LLC + +Project Page: https://github.com/argosopentech/argos-translate + +-------------------------------------------------------------------------- \ No newline at end of file diff --git a/python/ArgosTranslation/README.md b/python/ArgosTranslation/README.md new file mode 100644 index 00000000..36b66392 --- /dev/null +++ b/python/ArgosTranslation/README.md @@ -0,0 +1,58 @@ +# Overview + +This repository contains source code for the OpenMPF Argos Translation Component. + +This component translates the input text from a given source language to English. The source language can be provided as a job property, or be indicated in the detection properties from a feed-forward track. + + +# Job Properties +The below properties can be optionally provided to alter the behavior of the component. + +- `FEED_FORWARD_PROP_TO_PROCESS`: Controls which properties of the feed-forward track or detection are considered for translation. This should be a comma-separated list of property names (default: `"TEXT,TRANSCRIPT"`). The first named property in the list that is present is translated. At most, one property will be translated. + +- `LANGUAGE_FEED_FORWARD_PROP`: As above, a comma-separated list of property names (default: `"DECODED_LANGUAGE,LANGUAGE"`), which indicate which property of the feed-forward track or detection may be used to determine the source language of the text to be translated. This language is expected to be provided as an ISO 639-1 or and ISO 639-2 language code. + +- `DEFAULT_SOURCE_LANGUAGE`: The default source language to use if none of the property names listed in `LANGUAGE_FEED_FORWARD_PROP` are present in a feed-forward track or detection. This language is used when running a generic job with a raw text file (hence no feed-forward tracks). + + +# Language Identifiers +The following are the ISO 639-1 codes, the ISO 639-2 codes, and their corresponding languages which Argos Translate version 1.7.0 can translate to English. + +All translations are either to English or from English. When trying to translate from one non-English language to another, Argos will automatically pivot between languages using the currently installed packages. For example, for Spanish->French Argos would pivot from Spanish->English to English->French. This is associated with a drop in accuracy and increase in runtime. + +Language packages are downloaded dynamically as needed. In addition, when building a Docker image the Dockerfile pre-installs German, French, Russian, and Spanish. + +Note: Argos underperforms when translating to and from Chinese + +| ISO-639-1 | ISO-639-2 | Language | +| --- |---|------------------| +| `ar` | `ara` | Arabic | +| `az` | `aze` | Azerbaijani | +| `zh` | `cmn` | Chinese | +| `cs` | `ces` | Czech | +| `da` | `dan` | Danish | +| `nl` | `nld` | Dutch | +| `eo` | `epo` | Esperanto | +| `fi` | `fin` | Finnish | +| `fr` | `fra` | French | +| `de` | `deu` | German | +| `el` | `ell` | Greek | +| `he` | `heb` | Hebrew | +| `hi` | `hin` | Hindi | +| `hu` | `hun` | Hungarian | +| `id` | `ind` | Indonesian | +| `ga` | `gle` | Irish | +| `it` | `ita` | Italian | +| `ja` | `jpn` | Japanese | +| `ko` | `kor` | Korean | +| `fa` | `fas` | Persian | +| `pl` | `pol` | Polish | +| `pt` | `por` | Portuguese | +| `ru` | `rus` | Russian | +| `sk` | `slk` | Slovak | +| `es` | `spa` | Spanish | +| `sv` | `swe` | Swedish | +| `tr` | `tur` | Turkish | +| `uk` | `ukr` | Ukrainian | + + diff --git a/python/ArgosTranslation/argos_translation_component/__init__.py b/python/ArgosTranslation/argos_translation_component/__init__.py new file mode 100644 index 00000000..2966364e --- /dev/null +++ b/python/ArgosTranslation/argos_translation_component/__init__.py @@ -0,0 +1,27 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2022 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2022 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +from .argos_translation_component import ArgosTranslationComponent, TranslationWrapper diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py new file mode 100644 index 00000000..ea499be6 --- /dev/null +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -0,0 +1,280 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2023 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +from argostranslate import package, translate +from typing import Sequence, Dict +import pathlib + +import logging + +import mpf_component_api as mpf +import mpf_component_util as mpf_util + +logger = logging.getLogger('ArgosTranslationComponent') + + +class ArgosTranslationComponent: + detection_type = 'TRANSLATION' + + def get_detections_from_video(self, job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: + logger.info(f'Received video job.') + + return self.get_feed_forward_detections(job, job.feed_forward_track, video_job=True) + + def get_detections_from_image(self, job: mpf.ImageJob) -> Sequence[mpf.ImageLocation]: + logger.info(f'Received image job.') + + return self.get_feed_forward_detections(job, job.feed_forward_location) + + def get_detections_from_audio(self, job: mpf.AudioJob) -> Sequence[mpf.AudioTrack]: + logger.info(f'Received audio job.') + + return self.get_feed_forward_detections(job, job.feed_forward_track) + + @staticmethod + def get_detections_from_generic(job: mpf.GenericJob) -> Sequence[mpf.GenericTrack]: + logger.info(f'Received generic job.') + if job.feed_forward_track: + tw = TranslationWrapper(job.job_properties) + tw.add_translations(job.feed_forward_track.detection_properties) + return [job.feed_forward_track] + else: + logger.info('Job did not contain a feed forward track. Assuming ' + 'media file is a plain text file containing the text to ' + 'be translated.') + text = pathlib.Path(job.data_uri).read_text().strip() + + new_job_props = { + **job.job_properties, + 'FEED_FORWARD_PROP_TO_PROCESS': 'TEXT' + } + new_ff_props = dict(TEXT=text) + ff_track = mpf.GenericTrack(detection_properties=new_ff_props) + + tw = TranslationWrapper(new_job_props) + tw.add_translations(new_ff_props) + + return [ff_track] + + @staticmethod + def get_feed_forward_detections(job, job_feed_forward, video_job=False): + try: + if job_feed_forward is None: + raise mpf.DetectionError.UNSUPPORTED_DATA_TYPE.exception( + f'Component can only process feed forward ' + ' jobs, but no feed forward track provided. ') + + tw = TranslationWrapper(job. job_properties) + tw.add_translations(job_feed_forward.detection_properties) + + if video_job: + for ff_location in job.feed_forward_track.frame_locations.values(): + tw.add_translations(ff_location.detection_properties) + + return [job_feed_forward] + + except Exception: + logger.exception( + f'Failed to complete job due to the following exception:') + raise + + +class TranslationWrapper: + def __init__(self, job_props): + self.supported_languages = self.get_supported_languages_codes() + + self._installed_languages = translate.get_installed_languages() + + self.installed_lang_codes = [lang.code for lang in self._installed_languages] + + self._props_to_translate = [ + prop.strip() for prop in + mpf_util.get_property( + properties=job_props, + key='FEED_FORWARD_PROP_TO_PROCESS', + default_value='TEXT,TRANSCRIPT', + prop_type=str + ).split(',') + ] + + self._lang_prop_names = [ + prop.strip() for prop in + mpf_util.get_property( + properties=job_props, + key='LANGUAGE_FEED_FORWARD_PROP', + default_value='ISO_LANGUAGE,DECODED_LANGUAGE,LANGUAGE', + prop_type=str + ).split(',') + ] + + self._from_lang = mpf_util.get_property( + properties=job_props, + key='DEFAULT_SOURCE_LANGUAGE', + default_value='', + prop_type=str + ).lower().strip() + + self._to_lang = "en" + + self.iso_map = { + "ara": "ar", + "aze": "az", + "cmn": "zh", + "ces": "cs", + "dan": "da", + "nld": "nl", + "epo": "eo", + "fin": "fi", + "fra": "fr", + "deu": "de", + "ell": "el", + "heb": "he", + "hin": "hi", + "hun": "hu", + "ind": "id", + "gle": "ga", + "ita": "it", + "jpn": "ja", + "kor": "ko", + "fas": "fa", + "pol": "pl", + "por": "pt", + "rus": "ru", + "slk": "sk", + "spa": "es", + "swe": "sv", + "tur": "tr", + "ukr": "uk" + } + + self._translation_cache: Dict[str, Tuple[str, str]] = {} + + @staticmethod + def get_supported_languages_codes(): + try: + available_packages = package.get_available_packages() + except Exception: + logger.info("Downloading package index.") + package.update_package_index() + available_packages = package.get_available_packages() + + available_packages = [y.from_code for y in list( + filter( + lambda x: x.to_code == "en", available_packages + ) + )] + return available_packages + + def add_translations(self, ff_props: Dict[str, str]): + for prop_to_translate in self._props_to_translate: + input_text = ff_props.get(prop_to_translate, None) + if input_text: + break + else: + logger.warning("No text to translate found in track.") + return + + if cached_translation := self._translation_cache.get(input_text): + ff_props['TRANSLATION'] = cached_translation[0] + ff_props['TRANSLATION_SOURCE_LANGUAGE'] = cached_translation[1] + return + + for lang_prop_name in self._lang_prop_names: + if lang_prop_name in ff_props: + lang = ff_props.get(lang_prop_name).lower().strip() + if lang in self.supported_languages: + self._from_lang = lang + break + elif lang == 'en': + self._from_lang = lang + elif lang in self.iso_map: + self._from_lang = self.iso_map[lang] + else: + raise mpf.DetectionError.DETECTION_FAILED.exception( + f"Source language, {lang}, is not supported." + ) + else: + if self._from_lang == 'en': + ff_props['SKIPPED_TRANSLATION'] = 'TRUE' + logger.info(f'Skipped translation of the "{prop_to_translate}" ' + f'property because it was already in the target language.') + return + + if self._from_lang == "": + raise mpf.DetectionError.MISSING_PROPERTY.exception( + 'None of the properties from "LANGUAGE_FEED_FORWARD_PROP" ' + f'({self._lang_prop_names}) were found in the feed forward track and no ' + '"DEFAULT_SOURCE_LANGUAGE" was provided.') + + if self._from_lang != "" and self._from_lang not in self.supported_languages: + raise mpf.DetectionError.DETECTION_FAILED.exception( + f"Default source language, {self._from_lang}, is not supported." + ) + + if self._from_lang not in self.installed_lang_codes: + logger.info(f"Language {self._from_lang} is not installed. Installing package.") + + # From Argos Translate for downloading language models. + available_packages = package.get_available_packages() + available_package = list( + filter( + lambda x: x.from_code == self._from_lang and x.to_code == self._to_lang, available_packages + ) + )[0] + + download_path = available_package.download() + package.install_from_path(download_path) + + logger.info(f"Successfully installed {self._from_lang}.") + + self.installed_lang_codes = [lang.code for lang in translate.get_installed_languages()] + self._installed_languages = translate.get_installed_languages() + + from_lang = list(filter( + lambda x: x.code == self._from_lang, + self._installed_languages))[0] + to_lang = list(filter( + lambda x: x.code == self._to_lang, + self._installed_languages))[0] + + translation = from_lang.get_translation(to_lang) + + if translation is None: + raise mpf.DetectionError.DETECTION_FAILED.exception( + f"No valid translation model from {self._from_lang} to {self._to_lang}, " + f"check if any packages are missing." + ) + + logger.info(f"Translating the {prop_to_translate} property.") + + translated_text = translation.translate(input_text) + + self._translation_cache[input_text] = (translated_text, self._from_lang) + + logger.info("Translation complete.") + + ff_props['TRANSLATION_SOURCE_LANGUAGE'] = self._from_lang + ff_props['TRANSLATION'] = translated_text diff --git a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json new file mode 100644 index 00000000..1cd3f1ef --- /dev/null +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -0,0 +1,92 @@ +{ + "componentName": "ArgosTranslation", + "componentVersion": "7.2", + "middlewareVersion": "7.2", + "sourceLanguage": "python", + "batchLibrary": "ArgosTranslation", + "environmentVariables": [], + "algorithm": { + "name": "ARGOSTRANSLATION", + "description": "Uses Argos Translate to perform translation.", + "actionType": "DETECTION", + "requiresCollection": { + "states": [] + }, + "providesCollection": { + "states": [ + "DETECTION", + "DETECTION_TRANSLATION", + "DETECTION_TRANSLATION_ARGOS" + ], + "properties": [ + { + "name": "FEED_FORWARD_PROP_TO_PROCESS", + "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection to consider translating. If the first property listed is present, then that property will be translated. If it's not, then the next property in the list is considered. At most, one property will be translated.", + "type": "STRING", + "defaultValue": "TEXT,TRANSCRIPT" + }, + { + "name": "LANGUAGE_FEED_FORWARD_PROP", + "description": "Comma-separated list of property names indicating which properties in the feed-forward track or detection determine the language from which to translate. If the first property listed is present, then that property will be used. If it's not, then the next property in the list is considered. If none are present, fall back to FROM_LANGUAGE.", + "type": "STRING", + "defaultValue": "ISO_LANGUAGE,DECODED_LANGUAGE,LANGUAGE" + }, + { + "name": "DEFAULT_SOURCE_LANGUAGE", + "description": "Optional property that indicates the source language of the text.", + "type": "STRING", + "defaultValue": "" + } + ] + } + }, + "actions": [ + { + "name": "ARGOS TRANSLATION (WITH FF REGION) ACTION", + "description": "Uses Argos Translation to perform translation on feed-forward tracks and detections.", + "algorithm": "ARGOSTRANSLATION", + "properties": [ + { + "name": "FEED_FORWARD_TYPE", + "value": "REGION" + }, + { + "name": "OUTPUT_MERGE_WITH_PREVIOUS_TASK", + "value": "TRUE" + } + ] + }, + { + "name": "ARGOS TRANSLATION TEXT FILE ACTION", + "description": "Uses Argos Translation to perform translation on a plain text file.", + "algorithm": "ARGOSTRANSLATION", + "properties": [ + ] + } + ], + "tasks": [ + { + "name": "ARGOS TRANSLATION (WITH FF REGION) TASK", + "description": "Uses Argos Translate to perform translation on feed-forward tracks and detections.", + "actions": [ + "ARGOS TRANSLATION (WITH FF REGION) ACTION" + ] + }, + { + "name": "ARGOS TRANSLATION TEXT FILE TASK", + "description": "Uses Argos Translate to perform translation on a plain text file.", + "actions": [ + "ARGOS TRANSLATION TEXT FILE ACTION" + ] + } + ], + "pipelines": [ + { + "name": "ARGOS TRANSLATION TEXT FILE PIPELINE", + "description": "Uses Argos Translate to perform translation on a plain text file.", + "tasks": [ + "ARGOS TRANSLATION TEXT FILE TASK" + ] + } + ] +} diff --git a/python/ArgosTranslation/pyproject.toml b/python/ArgosTranslation/pyproject.toml new file mode 100644 index 00000000..d2aa20ab --- /dev/null +++ b/python/ArgosTranslation/pyproject.toml @@ -0,0 +1,29 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2023 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" diff --git a/python/ArgosTranslation/sample_argos_translator.py b/python/ArgosTranslation/sample_argos_translator.py new file mode 100644 index 00000000..f0a299c1 --- /dev/null +++ b/python/ArgosTranslation/sample_argos_translator.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2023 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +import sys + +from argos_translation_component import TranslationWrapper + + +def main(): + if len(sys.argv) != 3: + sys.exit(f'Usage {sys.argv[0]} ') + + _, from_lang, text = sys.argv + + detection_props = dict(TEXT=text) + job_props = dict(DEFAULT_SOURCE_LANGUAGE=from_lang) + TranslationWrapper(job_props).add_translations(detection_props) + + print('TRANSLATION SOURCE LANGUAGE:', detection_props['TRANSLATION_SOURCE_LANGUAGE']) + print('TRANSLATION:') + print(detection_props['TRANSLATION']) + + +if __name__ == '__main__': + main() diff --git a/python/ArgosTranslation/setup.cfg b/python/ArgosTranslation/setup.cfg new file mode 100644 index 00000000..d259ffd6 --- /dev/null +++ b/python/ArgosTranslation/setup.cfg @@ -0,0 +1,40 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2023 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +[metadata] +name = ArgosTranslation +version = 7.2 + +[options] +packages = argos_translation_component +install_requires = + mpf_component_api>=7.2 + mpf_component_util>=7.2 + argostranslate>=1.8.0 + +[options.entry_points] +mpf.exported_component = + component = argos_translation_component.argos_translation_component:ArgosTranslationComponent diff --git a/python/ArgosTranslation/tests/data/NOTICE b/python/ArgosTranslation/tests/data/NOTICE new file mode 100644 index 00000000..a7262730 --- /dev/null +++ b/python/ArgosTranslation/tests/data/NOTICE @@ -0,0 +1,4 @@ +# spanish_long.txt +Contains part of The Declaration of Independence translated to Spanish. +Public Domain +https://www.state.gov/wp-content/uploads/2020/02/Spanish-translation-U.S.-Declaration-of-Independence.pdf \ No newline at end of file diff --git a/python/ArgosTranslation/tests/data/spanish_long.txt b/python/ArgosTranslation/tests/data/spanish_long.txt new file mode 100644 index 00000000..c1287f81 --- /dev/null +++ b/python/ArgosTranslation/tests/data/spanish_long.txt @@ -0,0 +1 @@ +Sostenemos como evidentes estas verdades: que todos los hombres son creados iguales, que son dotados por su Creador de ciertos derechos inalienables, que entre éstos están la vida, la libertad y la búsqueda de la felicidad. Que para gara ntizar estos derechos se instituyen entre los hombres los gobiernos, que derivan sus poderes legítimos del consentimiento de los gobernados. Que cuando quiera que una forma de gobierno se haga destructora de estos principios, el pueblo tiene el derec ho a reformarla o abolirla e instituir un nuevo gobierno que se funde en dichos principios, y a organizar sus poderes en la forma que a su juicio ofrecerá las mayores probabilidades de alcanzar su seguridad y felicidad. \ No newline at end of file diff --git a/python/ArgosTranslation/tests/data/spanish_short.txt b/python/ArgosTranslation/tests/data/spanish_short.txt new file mode 100644 index 00000000..acd078c9 --- /dev/null +++ b/python/ArgosTranslation/tests/data/spanish_short.txt @@ -0,0 +1 @@ +¿Dónde está la biblioteca? \ No newline at end of file diff --git a/python/ArgosTranslation/tests/test_argos_translation.py b/python/ArgosTranslation/tests/test_argos_translation.py new file mode 100644 index 00000000..c32e9cff --- /dev/null +++ b/python/ArgosTranslation/tests/test_argos_translation.py @@ -0,0 +1,262 @@ +############################################################################# +# NOTICE # +# # +# This software (or technical data) was produced for the U.S. Government # +# under contract, and is subject to the Rights in Data-General Clause # +# 52.227-14, Alt. IV (DEC 2007). # +# # +# Copyright 2023 The MITRE Corporation. All Rights Reserved. # +############################################################################# + +############################################################################# +# Copyright 2023 The MITRE Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +############################################################################# + +from pathlib import Path +import sys +import unittest + +import mpf_component_api as mpf + +from argos_translation_component import ArgosTranslationComponent + +LOCAL_PATH = Path(__file__).parent +sys.path.insert(0, str(LOCAL_PATH.parent)) +TEST_DATA = LOCAL_PATH / 'data' + +SPANISH_SHORT_SAMPLE = '¿Dónde está la biblioteca?' +RUSSIAN_SHORT_SAMPLE = "Где библиотека?" +CHINESE_SHORT_SAMPLE = "你好,你叫什么名字?" +SHORT_OUTPUT = "Where's the library?" +SHORT_OUTPUT_CHINESE = "You have good names?" + +LONG_OUTPUT = ( + "We hold as evident these truths: that all men are created equal, " + "that they are endowed by their Creator with certain inalienable rights, " + "which among them are life, liberty and the pursuit of happiness. " + "That in order to nurture these rights, governments are instituted among men, " + "which derive their legitimate powers from the consent of the governed. " + "Whenever a form of government becomes destroyer of these principles, " + "the people have the right to reform or abolish it and to institute a new government " + "that is founded on those principles, and to organize their powers in the way that in " + "their opinion will offer the greatest chance of achieving their security and happiness." +) + + +class TestArgosTranslation(unittest.TestCase): + + def test_generic_job(self): + ff_track = mpf.GenericTrack(-1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.GenericJob('Test Generic', 'test.pdf', dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_generic(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_plaintext_job(self): + job = mpf.GenericJob('Test Plaintext', str(TEST_DATA / 'spanish_short.txt'), + dict(DEFAULT_SOURCE_LANGUAGE='ES'), {}) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_generic(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_audio_job(self): + ff_track = mpf.AudioTrack(0, 1, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.AudioJob('Test Audio', 'test.wav', 0, 1, dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_audio(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_image_job(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='ZH'), {}, ff_loc) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_image(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_video_job(self): + ff_track = mpf.VideoTrack( + 0, 1, -1, + { + 0: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=RUSSIAN_SHORT_SAMPLE, LANGUAGE='RU')) + }, + dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + job = mpf.VideoJob('Test Video', 'test.mp4', 0, 1, dict(DEFAULT_SOURCE_LANGUAGE=''), {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_video(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('es', result[0].frame_locations[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('ru', result[0].frame_locations[1].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[0].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION']) + + def test_language_behavior(self): + ff_track = mpf.VideoTrack( + 0, 1, -1, + { + 0: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=RUSSIAN_SHORT_SAMPLE, LANGUAGE='RU')), + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), + 2: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TEXT=CHINESE_SHORT_SAMPLE, LANGUAGE='ZH')), + 3: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SHORT_OUTPUT, LANGUAGE='EN')) + }, + dict(LANGUAGE='ES')) + job = mpf.VideoJob('Test Language', 'test.mp4', 0, 1, {}, {}, ff_track) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_video(job) + + self.assertEqual(1, len(result)) + + # Should skip English tracks + self.assertEqual('TRUE', result[0].frame_locations[3].detection_properties['SKIPPED_TRANSLATION']) + + self.assertEqual('ru', result[0].frame_locations[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('es', result[0].frame_locations[1].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual('zh', result[0].frame_locations[2].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[0].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT, result[0].frame_locations[1].detection_properties['TRANSLATION']) + self.assertEqual(SHORT_OUTPUT_CHINESE, result[0].frame_locations[2].detection_properties['TRANSLATION']) + + def test_large_text(self): + comp = ArgosTranslationComponent() + job = mpf.GenericJob( + job_name='Test Sentence Length', + data_uri=str(TEST_DATA / 'spanish_long.txt'), + job_properties=dict(DEFAULT_SOURCE_LANGUAGE='ES'), + media_properties={}, + feed_forward_track=None + ) + + result = comp.get_detections_from_generic(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(LONG_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_no_feed_forward_location(self): + comp = ArgosTranslationComponent() + job = mpf.ImageJob('Test', 'test.jpg', {}, {}) + + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_image(job)) + self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) + + def test_no_feed_forward_track(self): + comp = ArgosTranslationComponent() + job = mpf.VideoJob('test', 'test.mp4', 0, 1, {}, {}) + + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_video(job)) + self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) + + job = mpf.AudioJob('Test Audio', 'test.wav', 0, 1, {}, {}) + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_audio(job)) + self.assertEqual(mpf.DetectionError.UNSUPPORTED_DATA_TYPE, cm.exception.error_code) + + def test_unsupported_language(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='IS')) + job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='es'), {}, ff_loc) + comp = ArgosTranslationComponent() + + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_image(job)) + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + + job = mpf.GenericJob('Test Plaintext', str(TEST_DATA / 'spanish_short.txt'), + dict(DEFAULT_SOURCE_LANGUAGE='SPA'), {}) + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_generic(job)) + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANG='ES')) + job = mpf.ImageJob('Test Image', 'test.jpg', dict(DEFAULT_SOURCE_LANGUAGE='SPA'), {}, ff_loc) + comp = ArgosTranslationComponent() + + with self.assertRaises(mpf.DetectionException) as cm: + list(comp.get_detections_from_image(job)) + self.assertEqual(mpf.DetectionError.DETECTION_FAILED, cm.exception.error_code) + + + + def test_iso_map(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) + job = mpf.ImageJob('Test Image', 'test.jpg', {}, {}, ff_loc) + comp = ArgosTranslationComponent() + result = comp.get_detections_from_image(job) + + self.assertEqual(1, len(result)) + self.assertEqual('es', result[0].detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + self.assertEqual(SHORT_OUTPUT, result[0].detection_properties['TRANSLATION']) + + def test_translation_cache(self): + ff_track = mpf.VideoTrack( + 0, 1, -1, + { + 0: mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')), + 1: mpf.ImageLocation(0, 10, 10, 10, -1, dict(TRANSCRIPT=SPANISH_SHORT_SAMPLE, LANGUAGE='SPA')) + }, + dict(TEXT=SPANISH_SHORT_SAMPLE, LANGUAGE='ES')) + + job = mpf.VideoJob('test', 'test.jpg', 0, 1, {}, {}, ff_track) + + comp = ArgosTranslationComponent() + results = comp.get_detections_from_video(job) + + self.assertEqual(1, len(results)) + result = results[0] + + self.assertEqual(SPANISH_SHORT_SAMPLE, result.detection_properties['TEXT']) + self.assertEqual(SHORT_OUTPUT, result.detection_properties['TRANSLATION']) + self.assertEqual('es', result.detection_properties['TRANSLATION_SOURCE_LANGUAGE']) + + detection1 = result.frame_locations[0] + self.assertEqual(SPANISH_SHORT_SAMPLE, detection1.detection_properties['TEXT']) + self.assertEqual(SHORT_OUTPUT, detection1.detection_properties['TRANSLATION']) + + detection2 = result.frame_locations[1] + self.assertEqual(SPANISH_SHORT_SAMPLE, detection2.detection_properties['TRANSCRIPT']) + self.assertEqual(SHORT_OUTPUT, detection2.detection_properties['TRANSLATION']) + + def test_no_feed_forward_prop_no_default_lang(self): + ff_loc = mpf.ImageLocation(0, 0, 10, 10, -1, dict(TEXT=SPANISH_SHORT_SAMPLE, LANG='ES')) + job = mpf.ImageJob('Test Image', 'test.jpg', {}, {}, ff_loc) + comp = ArgosTranslationComponent() + + with self.assertRaises(mpf.DetectionException) as cm: + comp.get_detections_from_image(job) + self.assertEqual(mpf.DetectionError.MISSING_PROPERTY, cm.exception.error_code) + + + + +if __name__ == '__main__': + unittest.main()