In [None]:
import pandas as pd
import numpy as np
import html
import panel as pn
pn.extension(design="material", sizing_mode="stretch_width")
@pn.cache
def loadData():
    return pd.read_parquet('data/moon_example.parquet')
df = loadData()

def BACKGROUND_validationColor(summary, txt): # background color of the title part of the text input (if the page is light, this is better)
    if txt is None or len(txt) == 0: return ':host { --design-background-color: #ffd7b5; }'
    _parts_ = txt.split('...')
    for _part_ in _parts_:
        _part_ = _part_.lower().strip()
        if _part_ not in summary.lower(): return ':host { --design-background-color: #ffcccb; }'
    return ':host { --design-background-color: #90d5ff; }'

def FROM_DF_svgForHighlights():
    _df_    = df.query(f'`{q_id_field}` == @q_id and `{source_field}` == @model').reset_index()
    summary = _df_.iloc[0][summary_field]
    _tb_    = rt.textBlock(summary, word_wrap=True, w=1600)
    _lu_    = {}
    for i in range(len(_df_)):
        _excerpt_ = _df_.iloc[i][excerpt_field]
        _scu_     = _df_.iloc[i][scu_field]
        _parts_   = _excerpt_.split('...')
        for _part_ in _parts_:
            _part_ = _part_.strip().lower()
            if len(_part_) == 0: continue
            i0 = 0
            i0 = summary.lower().index(_part_, i0) if _part_ in summary.lower()[i0:] else None
            while i0 is not None:
                i1 = i0 + len(_part_)
                _lu_[summary[i0:i1]] = '#c0c0c0'
                i0 = summary.lower().index(_part_, i1+1) if _part_ in summary.lower()[i1+1:] else None
    return _tb_.highlights(_lu_)


In [None]:
q_id_field            = 'question_id'
question_field        = 'question'
scu_field             = 'summary_content_unit'
source_field          = 'model'
summary_field         = 'summary'
excerpt_field         = 'excerpt'

q_id, model           = 'Q01', 'gpt4o'
summary               = df.query(f'`{q_id_field}` == @q_id and `{source_field}` == @model')[summary_field].unique()[0]
scus                  = sorted(df.query(f'`{q_id_field}` == @q_id')[scu_field].unique()) # all scu's identified for this question
scu_to_text_input     = {}
text_input_to_scu     = {}

# https://panel.holoviz.org/how_to/styling/design_variables.html
def validationColor(summary, txt): # color of the text itself (if the page is dark, this is better)
    if txt is None or len(txt) == 0: return ':host { --design-secondary-text-color: #ffd7b5; }'
    _parts_ = txt.split('...')
    for _part_ in _parts_:
        _part_ = _part_.lower().strip()
        if _part_ not in summary.lower(): return ':host { --design-secondary-text-color: #ff0000; }'
    return ':host { --design-secondary-text-color: #90d5ff; }'

def exampleSCUs(scu):
    _htmls_ = [f'<h3> "{html.escape(scu)}" Examples </h3>']
    _df_ = df.query(f'`{q_id_field}` == @q_id and `{scu_field}` == @scu and `{source_field}` != @model').reset_index()
    for i in range(len(_df_)):
        _excerpt_ = _df_.iloc[i][excerpt_field]
        _model_   = _df_.iloc[i][source_field]
        _htmls_.append(f'<p><b>{html.escape(_model_)}</b><br>{html.escape(_excerpt_)}</p>')
    return ''.join(_htmls_)

def inputTextChanged(*events):
    for event in events:
        txt = event.obj.value_input
        event.obj.stylesheets = [validationColor(summary,txt)]
    summary_widget.object = markupHighlights(summary)
    if event.obj in text_input_to_scu: 
        scu_examples_widget.object = exampleSCUs(text_input_to_scu[event.obj])

def markupHighlights(summary):
    tuples = []
    # Identify the tuples (indices and lengths) based on the excerpt parts
    for scu in scus:
        _excerpt_ = scu_to_text_input[scu].value
        _parts_   = _excerpt_.split('...')
        for _part_ in _parts_:
            _part_ = _part_.strip().lower()
            if len(_part_) == 0: continue
            i0 = 0
            i0 = summary.lower().index(_part_, i0) if _part_ in summary.lower()[i0:] else None
            while i0 is not None:
                i1 = i0 + len(_part_)
                tuples.append((i0, len(_part_)))
                i0 = summary.lower().index(_part_, i1) if _part_ in summary.lower()[i1:] else None
    # Aggregate the tuples
    tuples = sorted(tuples)
    i = 0
    while i < len(tuples):
        if i < len(tuples)-1 and tuples[i+1][0] <= tuples[i][0] + tuples[i][1]:
            tuples[i] = (tuples[i][0], (tuples[i+1][0] + tuples[i+1][1]) - tuples[i][0])
            tuples.pop(i+1)
        else: i += 1
    # Markup the HTML
    with_marks = []
    i, j = 0, 0
    while i < len(summary):
        if j < len(tuples):
            if i < tuples[j][0]:
                with_marks.append(html.escape(summary[i:tuples[j][0]]))
            _safe_ = html.escape(summary[tuples[j][0]:tuples[j][0]+tuples[j][1]])
            with_marks.append(f'<mark>{_safe_}</mark>')
            i, j = tuples[j][0]+tuples[j][1], j+1
        else:
            with_marks.append(html.escape(summary[i:]))
            i = len(summary)
    return ''.join(with_marks)

# make widgets
text_inputs = []
for scu in scus:
    _df_ = df.query(f'`{q_id_field}` == @q_id and `{scu_field}` == @scu and `{source_field}` == @model')
    if len(_df_) == 0: _str_ = ''
    else:              _str_ = _df_.iloc[0]['excerpt']
    text_input               = pn.widgets.TextInput(name=scu, 
                                                    value=_str_,
                                                    stylesheets=[validationColor(summary, _str_)])
    text_input.param.watch(inputTextChanged, ['value_input','value'], onlychanged=False)
    scu_to_text_input[scu], text_input_to_scu[text_input] = text_input, scu
    text_inputs.append(text_input)
summary_widget      = pn.pane.HTML(markupHighlights(summary))
scu_examples_widget = pn.pane.HTML('<h3>Examples...</h3>')

def createDataFrame():
    _lu_     = {q_id_field:[], question_field:[], source_field:[], scu_field:[], summary_field:[], excerpt_field:[]}
    for scu in scu_to_text_input:
        _excerpt_ = scu_to_text_input[scu].value
        if len(_excerpt_) == 0: continue
        _lu_[q_id_field].append(q_id)
        _lu_[question_field].append(df.query(f'`{q_id_field}` == @q_id')[question_field].unique()[0])
        _lu_[source_field].append(model)
        _lu_[scu_field].append(scu)
        _lu_[summary_field].append(summary)
        _lu_[excerpt_field].append(_excerpt_)
    return pd.DataFrame(_lu_)

# make layout
_column_ = pn.Column(summary_widget, 
                     pn.GridBox(*text_inputs, ncols=3, sizing_mode="fixed", width=1600),
                     scu_examples_widget)
_column_

In [None]:
df.sample(3)

In [None]:
createDataFrame()