# Logging
* Showcasing how the Logger is used in combination with the translation task
* We use the MockClient that was created for testing the TranslationTask


In [1]:
!rm -rf tmp_test

In [2]:
from test_task_and_logger import MockClient
from scripts.task import TranslationTask
from scripts.data_management import Opus100Manager
from scripts.logger import MyLogger, RetryLog
from io import StringIO
import json
import pprint

dm = Opus100Manager()
pairs = [('de', 'en'), ('en', 'de'), ('fr', 'en'), ('en', 'fr')]
logfile = StringIO()
logger = MyLogger(logfile=logfile)
# scenario, 1 = failure (rejection of output defined by us), 2 = error (API error, no output)
cli = MockClient(logger=logger, scenario=[0, 1, 0, 1, 1, 1, 2, 1, 2, 2, 0])

task = TranslationTask(
    target_pairs=pairs,
    dm=dm,
    client=cli,
    logger=logger,
    mt_folder='tmp_test',
    num_of_sents=400,
    retry_delay=0 # Default retry_delay is 30s, we set it to 0
)

* We produce logs that we can watch in real-time as the translation happens

In [3]:
task.run()

[🏁]: Starting task 2e1f3094-d2fa-4b48-87c8-353296287291
[✔️]: 400 translated from de to en
[❌]: Output for en-de is not acceptable!
[⏲️]: Retrying en-de...
[✔️]: 400 translated from en to de
[❌]: Output for fr-en is not acceptable!
[⏲️]: Retrying fr-en...
[❌]: Output for fr-en is not acceptable!
[⏲️]: Retrying fr-en...
[❌]: Output for fr-en is not acceptable!
[⏲️]: Retrying fr-en...
[⚠️]: Error MockError
[⏩]: Failed 3 times, skipping fr-en...
[❌]: Output for en-fr is not acceptable!
[⏲️]: Retrying en-fr...
[⚠️]: Error MockError
[⏲️]: Retrying en-fr...
[⚠️]: Error MockError
[⏲️]: Retrying en-fr...
[✔️]: 400 translated from en to fr
[🏁]: Task took 1.35s


* We produce more detailed logs in JSONL files that contain all kinds of information we deemed important for transparancy, including the commit hash
* Compare Notebook Printed Logs for Case Success, Failure and Error with Logger's logs

In [4]:
log_data = [json.loads(ln) for ln in logfile.getvalue().splitlines()]
interest = [log_data[0], log_data[1], log_data[6]]
for log in interest:
    pprint.pprint(log)

{'dataset': {'name': 'Helsinki-NLP/opus-100',
             'num_of_sents': 400,
             'split': 'test[:500]',
             'start_idx': 0},
 'git_hash': 'db084bb',
 'task_id': '2e1f3094-d2fa-4b48-87c8-353296287291',
 'translation': {'end': 1746210182.1003325,
                 'end_timestamp': '2025-05-02 20:23:02.100336+02:00',
                 'id': 'eb0f7864-8bc2-4393-90a9-423350cc34ab',
                 'in_chars': 32731,
                 'in_lines': 400,
                 'in_sents': 444,
                 'in_tokens': 8295,
                 'out_chars': 32731,
                 'out_lines': 400,
                 'out_sents': 478,
                 'out_tokens': 15578,
                 'src_lang': 'de',
                 'start': 1746210182.053792,
                 'start_timestamp': '2025-05-02 20:23:02.091497+02:00',
                 'tgt_lang': 'en',
                 'time': 0.04654049873352051,
                 'translator': 'mock'},
 'verdict': {'success': 'Translation accept

* We distinguish between success, failure and error using the `verdict` field in the logs. 
    * A failure is defined by us, it means the output was deemed insufficient and calling the API again is the only option, this happens if the output is too large or too short.
    * An error is caused by the API, resulting in us getting no output.
* There is no `fr-en.txt` file because it failed 4 times in total, the 1st time and the 3 retries it was given. 
* In cases were an output was received but deemed 'failure', output is stored with `fail` suffix.

In [5]:
import os
os.listdir('tmp_test')

['en-fr.txt',
 'fr-en_fail1.txt',
 'fr-en_fail3.txt',
 'de-en.txt',
 'en-fr_fail1.txt',
 'task',
 'en-de.txt',
 'en-de_fail1.txt',
 'fr-en_fail2.txt']

In [6]:
!cat tmp_test/task

Task Id: 2e1f3094-d2fa-4b48-87c8-353296287291
Task Duration: 1.35


* We store the log_ids of the first and last success, namely pair `de-en` and `en-fr`

In [7]:
log_ids = [log['translation']['id'] for log in [log_data[0], log_data[-1]]]
log_ids

['eb0f7864-8bc2-4393-90a9-423350cc34ab',
 '2083ac5f-4aa6-4f67-8b03-aeb41869cb43']

In [8]:
de_en_log = log_data[0]
en_fr_log = log_data[-1]

## Manual Retry
* In case we still think we need to retry the call, we have to start a new task and specifcy the log ids of the translations we want to compute again 
* This provides an adequate level of 
* This is done in cases that are harder to detect automatically or do not make sense to detect automatically such as unnaturally low BLEU score or perhaps realizing that the model did not translate anything but just returned the source text or malformatted text in general.

In [9]:
retry_pairs = [('de', 'en'), ('en', 'fr')]
retry = RetryLog(pairs=retry_pairs, log_ids=log_ids, reasons=['BLEU score unnaturally low', 'Returned Src text'])
new_logger = MyLogger(logfile=logfile, retry=retry)
cli = MockClient(logger=new_logger)
task = TranslationTask(
    target_pairs=retry_pairs,
    dm=dm,
    client=cli,
    logger=new_logger,
    mt_folder='tmp_test',
    num_of_sents=400,
    manual_retry=True,
)

In [10]:
!rm -rf tmp_test

In [11]:
task.run()

[🏁]: Starting task f37f6180-6ee4-469f-8f28-2f1dfb7ebe1e
[✔️]: 400 translated from de to en
[✔️]: 400 translated from en to fr
[🏁]: Task took 0.29s


In [12]:
log_data = [json.loads(ln) for ln in logfile.getvalue().splitlines()]
interest = [de_en_log, en_fr_log, log_data[-2], log_data[-1]]
for log in interest:
    pprint.pprint(log)

{'dataset': {'name': 'Helsinki-NLP/opus-100',
             'num_of_sents': 400,
             'split': 'test[:500]',
             'start_idx': 0},
 'git_hash': 'db084bb',
 'task_id': '2e1f3094-d2fa-4b48-87c8-353296287291',
 'translation': {'end': 1746210182.1003325,
                 'end_timestamp': '2025-05-02 20:23:02.100336+02:00',
                 'id': 'eb0f7864-8bc2-4393-90a9-423350cc34ab',
                 'in_chars': 32731,
                 'in_lines': 400,
                 'in_sents': 444,
                 'in_tokens': 8295,
                 'out_chars': 32731,
                 'out_lines': 400,
                 'out_sents': 478,
                 'out_tokens': 15578,
                 'src_lang': 'de',
                 'start': 1746210182.053792,
                 'start_timestamp': '2025-05-02 20:23:02.091497+02:00',
                 'tgt_lang': 'en',
                 'time': 0.04654049873352051,
                 'translator': 'mock'},
 'verdict': {'success': 'Translation accept

* The last log entries have a `manual_retry` field, indicating that they were added post translation

In [13]:
de_en_log['translation']['id']

'eb0f7864-8bc2-4393-90a9-423350cc34ab'

In [14]:
log_data[-2]['manual_retry'], (log_data[-2]['translation']['src_lang'], log_data[-2]['translation']['tgt_lang'])

({'log_id': 'eb0f7864-8bc2-4393-90a9-423350cc34ab',
  'reason': 'BLEU score unnaturally low'},
 ('de', 'en'))