In [None]:
from typing import Dict, Optional
from sortedcontainers import SortedDict  # type: ignore
import os

import ipysheet as sheet
from IPython.display import display, HTML
import mammoth

from const import IDX_COL, STYLE_COL
from config import FROM_LANG, TO_LANG
from setup import sl_sem, gr_sem

from util import ord_word
from semantics import TableSemantics

from merger import merge
from aggregator import aggregate
from exporter import export_docx
from generator import generate_docx

In [None]:
sla = SortedDict(ord_word)
gre = SortedDict(ord_word)

pairs = [
    TableSemantics(sl_sem, gr_sem, label="от славянски основен към гръцки", result=sla),
    TableSemantics(
        sl_sem.var, gr_sem, label="от славянски вариант към гръцки", result=sla
    ),
    TableSemantics(gr_sem, sl_sem, label="от гръцки основен към славянски", result=gre),
    TableSemantics(
        gr_sem.var, sl_sem, label="от гръцки вариант към славянски", result=gre
    ),
]
sem = pairs[0]

In [None]:
def _style2str(s: Dict[str, str], bgs: Dict[str, Optional[str]]) -> str:
    """take font style from index and presence of background in selected named columns"""
    result = [k for k, v in bgs.items() if v]
    if s and s["fontWeight"] == "bold":
        result.append("bold")
    if s and s["fontStyle"] == "italic":
        result.append("italic")
    return "|".join(result)

In [None]:
rows = [
    [""] * 4
    + [
        "1/W168a14",
        "вьꙁмогл\ue205",
        "мы брьньн\ue205 \ue205 ꙁемⷧ҇ьн\ue205\ue205• вьꙁмогл\ue205",
        "въꙁмощ\ue205",
    ]
    + [""] * 3
    + ["ἠδυνήθημεν", "δύναμαι"]
    + [""] * 13,
    [""] * 4
    + [
        "1/W168a15",
        "б\ue205хомь",
        "б\ue205хомь стрьпѣтї•",
        "бꙑт\ue205",
        "",
        "gramm.",
        "",
        "",
        "pass.",
    ]
    + [""] * 13,
]


def hilite(r, c):
    if c == 5:
        return "moccasin"
    if r == 1 and c == 9:
        return "lightblue"
    return None


def bold(r, c):
    if c == 7:
        return "bold"
    return None


input = sheet.sheet(rows=len(rows) + 1, columns=STYLE_COL)
# styled = sheet.cell(1, 9, background_color="lightblue")
# rows = sheet.cell_range(rows)
cells = [
    [
        sheet.cell(
            r, c, rows[r][c], background_color=hilite(r, c), font_weight=bold(r, c)
        )
        for c, vc in enumerate(vr)
    ]
    for r, vr in enumerate(rows)
]
input

In [None]:
lines = []
blank = False
for row in cells:
    line = [cell.value.strip() if cell.value else cell.value for cell in row]
    # Two consequent blank lines
    if blank and not [l for l in line if l]:
        break
    irow = [v for v in row]
    bgs = {
        f"hl{v:02d}": irow[v].style["backgroundColor"]
        for v in sem.cols()
        if "backgroundColor" in irow[v].style
    }
    line.append(_style2str(row[IDX_COL].style, bgs))
    lines.append(line)
    blank = not [l for l in line if l]
print(f"{len(lines)} думи")

In [None]:
print(lines)

In [None]:
for p in pairs:
    print(f"Събиране на многоредови преводи {p.label}...")
    merged = merge(lines, p.orig, p.trans)
    print(f"{len(merged)} думи")

    print(f"Кондензиране {p.label}...")
    before = len(p.result)
    p.result = aggregate(merged, p.orig, p.trans, p.result)
    after = len(p.result)
    print(f"{after-before} леми")

In [None]:
export_fname = "temp.docx"

In [None]:
print("Експорт славянски...")
export_docx(sla, FROM_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

In [None]:
print("Експорт гръцки...")
export_docx(gre, TO_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

In [None]:
print("Генериране славянски...")
generate_docx(sla, FROM_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

In [None]:
print("Генериране гръцки...")
generate_docx(gre, TO_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

In [None]:
os.remove(export_fname)