# Web managing kleio files

In [4]:
from timelink.kleio import KleioServer

timelink_home = KleioServer.find_local_kleio_home()
print(timelink_home)

/Users/jrc/develop/timelink-py/tests/timelink-home/projects/web-tests


In [5]:
from timelink.kleio import KleioServer
from timelink.api.database import TimelinkDatabase
import os

kserver = KleioServer.start(kleio_home=timelink_home)
db_dir = '../database/sqlite/'
db = TimelinkDatabase(db_type='sqlite',
                      db_path=db_dir,
                      db_name='timelink-web')
kserver, db
# link database to kleio server
db.set_kleio_server(kserver)



## Get the list of Kleio files and translate and import status

In [6]:
from typing import List
from timelink.kleio import KleioFile
import pandas as pd

files = db.get_import_status()
files_df = pd.DataFrame([dict(file) for file in files])
files_df.info()
files_df[['path', 'status', 'warnings','import_status','import_warnings', 'modified_string']]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype              
---  ------              --------------  -----              
 0   path                4 non-null      object             
 1   name                4 non-null      object             
 2   size                4 non-null      int64              
 3   directory           4 non-null      object             
 4   modified            4 non-null      datetime64[ns, UTC]
 5   modified_iso        4 non-null      datetime64[ns, UTC]
 6   modified_string     4 non-null      object             
 7   qtime               4 non-null      datetime64[ns, UTC]
 8   qtime_string        4 non-null      object             
 9   source_url          4 non-null      object             
 10  status              4 non-null      object             
 11  translated          4 non-null      datetime64[ns, UTC]
 12  translated_string   4 non-null      obje

Unnamed: 0,path,status,warnings,import_status,import_warnings,modified_string
0,sources/b1685.cli,translation_status_enum.W,1,import_status_enum.U,,2025-02-09 03:35:45
1,sources/dehergne-a.cli,translation_status_enum.V,0,import_status_enum.U,,2025-02-09 03:08:53
2,sources/dehergne-locations-1644.cli,translation_status_enum.V,0,import_status_enum.U,,2025-02-09 03:08:50
3,sources/real-entities/real-entities.cli,translation_status_enum.V,0,import_status_enum.U,,2025-02-09 03:08:51


Get files with specific status

* T = needs translation (modified after last translation or never translated)
* E = translated with errors
* W = translated with warnings
* V = valid, translated sucessfully
* P = currently being processed by Kleio Server
* Q = queued for translation by Kleio Server

In [7]:
with_warnings =  kserver.get_translations(path="", status="W", recurse=True)

dict(with_warnings[0])  # get a dict, better display

{'path': 'sources/b1685.cli',
 'name': 'b1685.cli',
 'size': 17236,
 'directory': 'sources',
 'modified': datetime.datetime(2025, 2, 9, 3, 35, 45, 465041, tzinfo=TzInfo(UTC)),
 'modified_iso': datetime.datetime(2025, 2, 9, 3, 35, 45, tzinfo=TzInfo(UTC)),
 'modified_string': '2025-02-09 03:35:45',
 'qtime': datetime.datetime(1970, 1, 1, 0, 0, tzinfo=TzInfo(UTC)),
 'qtime_string': '1970-01-01 00:00:00',
 'source_url': '/rest/sources/sources/b1685.cli',
 'status': <translation_status_enum.W: 'W'>,
 'translated': datetime.datetime(2025, 2, 9, 3, 35, tzinfo=TzInfo(UTC)),
 'translated_string': '2025-02-09T03:35:00+00:00',
 'errors': 0,
 'version': 'KleioTranslator - server version 12.7 - build 579 2025-01-29 17:45:15',
 'rpt_url': '/rest/reports/sources/b1685.rpt',
 'xml_url': '/rest/exports/sources/b1685.xml',
 'import_status': None,
 'import_errors': None,
 'import_error_rpt': None,
 'imported': None,
 'imported_string': None}

Get a translation report

In [8]:
rpt = kserver.get_report(with_warnings[0])
print(rpt[0:512],"...")


KleioTranslator - server version 12.7 - build 579 2025-01-29 17:45:15
9-2-2025 3-35

Processing data file b1685.cli
-------------------------------------------
Generic Act translation module with geoentities (XML).
     Joaquim Ramos de Carvalho (joaquim@uc.pt) 
** New document: kleio
kleio translation started
Structure: gacto2.str
Prefix: 
Autorel: 
Translation count: 81
Obs: 
** Processing source fonte$baptismos 1685



Show the Kleio source file

In [9]:
src = kserver.get_source(with_warnings[0])
print(src[0:512])

kleio$gacto2.str/translations=82
   fonte$baptismos 1685/tipo=reg paroquiais/localizacao=fol. 30-34/data=16850000/obs=existem baptismos anteriores mas em muito mau estado.#VERSAO ABREVIADA. USAR O TRADUTOR CORRECTO

      bap$b1685.1/8/7/1685/?/manuel cordeiro

         n$maria/f/id=b1685.1-per1

            pai$manuel madeira/m/id=b1685.1-per1-per2
               ls$residencia/alencarce

            mae$domingas joao/f/id=b1685.1-per1-per3/obs=

            pad$antonio jorge/m/id=b1685.1-per4
             


## Import files into the database

The method db.update_from_sources() compares modification timestamps with
import timestamps and translates and import as needed.

In [10]:
import logging

# This should run in the background in real life apps.
db.update_from_sources()

## Clean translations, reimport

Sometimes a full reload is needed (for instance when there is a 
relevant update of kleio server).

In [11]:
import logging

# Set log level to INFO
logging.basicConfig(level=logging.INFO, force=True)

# Clean translations and update database
kserver.translation_clean("", recurse="yes")
# translate and reimport
db.update_from_sources()

INFO:root:Request translation of T sources/b1685.cli
INFO:root:Request translation of T sources/dehergne-a.cli
INFO:root:Request translation of T sources/dehergne-locations-1644.cli
INFO:root:Request translation of T sources/real-entities/real-entities.cli


## Translate and import single files

How to trigger the translation of a single file 
(or directory, can recurse into inner directories) and check with kleio server progress of translation.

In [14]:
import time

# get file list
files=kserver.get_translations("", recurse=True)
# pick one
file = files[0]
path = file.path
print(path, file.translated)

# ask kleio server to translate and wait
kserver.translate(path)
# check progress (status="P") for queued use ("Q")
in_process = kserver.get_translations(path=path, status="P")
while len(in_process) > 0:
    print("processing:", len(in_process))
    queued = kserver.get_translations(path=path, status="Q")
    print("queued:",len(queued))
    print("waiting")
    time.sleep(5)
    in_process = kserver.get_translations(path=path, status="P")

need_import = db.get_need_import()
print("need_import",  need_import)


sources/b1685.cli 2025-02-09 04:18:00+00:00
processing: 1
queued: 0
waiting


### Importing specific files

In [15]:
stats = db.import_from_xml(need_import[0])
print(stats)


INFO:root:Storing 2 postponed relations


{'datetime': 1739080800.075247, 'machine': 'jrc-air-m2.local', 'database': sqlite:////Users/jrc/develop/timelink-py/tests/timelink-home/projects/web-tests/database/sqlite/timelink-web.sqlite, 'file': '/rest/exports/sources/b1685.xml', 'import_time_seconds': 3.7359778881073, 'entities_processed': 0, 'entity_rate': 0.0, 'person_rate': 0.0, 'nerrors': 0, 'errors': []}


## Check the result of import

In [28]:
ifile: KleioFile = db.get_import_status(path=file.path)[0]  # just one
print(ifile.import_status.value, ifile.import_errors, ifile.import_warnings)
print(ifile.import_error_rpt)
print(ifile.import_warning_rpt)

I 0 0
No errors
