In [None]:
from typing import Dict, List, Optional
from sortedcontainers import SortedDict  # type: ignore
import os

import ipysheet as sheet
from IPython.display import display, HTML
import mammoth

from const import IDX_COL, STYLE_COL
from config import FROM_LANG, TO_LANG
from setup import sl_sem, gr_sem

from util import ord_word
from semantics import TableSemantics
from hiliting import _hilited_col

from merger import merge
from aggregator import aggregate
from exporter import export_docx
from generator import generate_docx

In [None]:
sla = SortedDict(ord_word)
gre = SortedDict(ord_word)

pairs = [
    TableSemantics(sl_sem, gr_sem, label="от славянски основен към гръцки", result=sla),
    TableSemantics(
        sl_sem.var, gr_sem, label="от славянски вариант към гръцки", result=sla
    ),
    TableSemantics(gr_sem, sl_sem, label="от гръцки основен към славянски", result=gre),
    TableSemantics(
        gr_sem.var, sl_sem, label="от гръцки вариант към славянски", result=gre
    ),
]
sem = pairs[0]

In [None]:
def _style2str(s: Dict[str, str], bgs: Dict[str, Optional[str]]) -> str:
    """take font style from index and presence of background in selected named columns"""
    result = [k for k, v in bgs.items() if v]
    if s and s["fontWeight"] == "bold":
        result.append("bold")
    if s and s["fontStyle"] == "italic":
        result.append("italic")
    return "|".join(result)


def hilite(row, c):
    color = _hilited_col(row, c)
    if color:
        return f"#{color[2:]}" if len(color) == 8 else f"#{color}"
    return None


def bold(c, style=None):
    if c != IDX_COL:
        return None
    if not style:
        return None
    if "bold" in style:
        return "bold"
    return None


def italic(c, style=None):
    if c != IDX_COL:
        return None
    if not style:
        return None
    if "italic" in style:
        return "italic"
    return None


def pp_row(row: List[str]) -> str:
    style = "style='font-family: CyrillicaOchrid10U; background: white; border:1px solid lightgrey'"
    glue = f"</td><td {style}>"
    return f"""<tr><td {style}>{glue.join(row)}</td><tr>"""


def pp_group(rows: List[List[str]]) -> str:
    style = ""  #'<style>td{font-family: CyrillicaOchrid10U; background: white; border:1px solid lightgrey;}</style>'
    return f'{style}<table>{"".join(pp_row(r) for r in rows)}</table>'

# Manual data insertion

# Read insertion from file

In [None]:
from importer import import_mapping
# fname = "/home/mapto/Dropbox/uchitelno-evangelie/paper/sofia2023/adaptation-1-prilezhanie-2x2.xlsx"
fname = "/home/mapto/Dropbox/uchitelno-evangelie/paper/sofia2023/aggregation-1-pateshestvie-groupvar.xlsx"
rows = import_mapping(fname, sem)
rows = [[c.strip() if c else "" for c in r] for r in rows if any(r)]
print(rows)

Note: Seems to work with Chrome only

In [None]:
input = sheet.sheet(rows=len(rows) + 1, columns=STYLE_COL)
# styled = sheet.cell(1, 9, background_color="lightblue")
# rows = sheet.cell_range(rows)
cells = [
    [
        sheet.cell(
            r,
            c,
            rows[r][c],
            background_color=hilite(vr, c),
            font_style=italic(c, vr[STYLE_COL]),
            font_weight=bold(c, vr[STYLE_COL]),
        )
        for c, vc in enumerate(vr[:STYLE_COL])
    ]
    for r, vr in enumerate(rows)
]
input

# Preprocess lines

In [None]:
lines = []
blank = False
for row in cells:
    line = [cell.value.strip() if cell.value else cell.value for cell in row]
    # Two consequent blank lines
    if blank and not [l for l in line if l]:
        break
    irow = [v for v in row]
    bgs = {
        f"hl{v:02d}": irow[v].style["backgroundColor"]
        for v in sem.cols()
        if "backgroundColor" in irow[v].style
    }
    line.append(_style2str(row[IDX_COL].style, bgs))
    lines.append(line)
    blank = not [l for l in line if l]
print(f"{len(lines)} думи")


In [None]:
intermediary = {}
for p in pairs:
    print(f"Събиране на многоредови преводи {p.label}...")
    # merged = merge(lines, p.orig, p.trans)
    merged = merge(rows, p.orig, p.trans)
    print(f"{len(merged)} думи")
    intermediary[p.label] = [m[:STYLE_COL] for m in merged]
    # intermediary[p.label] = merged
    
    print(f"Кондензиране {p.label}...")
    before = len(p.result)
    p.result = aggregate(merged, p.orig, p.trans, p.result)
    after = len(p.result)
    print(f"{after-before} леми")

# Inspection of intermediary lines

In [None]:
for label, table in intermediary.items():
    print(label)
    display(HTML(pp_group(table)))

In [None]:
export_fname = "temp.docx"

# Export Slavic...

In [None]:
export_docx(sla, FROM_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

# Export Greek...

In [None]:
export_docx(gre, TO_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

# Generation Slavic...

In [None]:
generate_docx(sla, FROM_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

# Generation Greek...

In [None]:
generate_docx(gre, TO_LANG, export_fname)
with open(export_fname, "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    print(result.messages)  # Any messages, such as warnings during conversion
    display(HTML(result.value))

In [None]:
os.remove(export_fname)