<h2 style="font-family: 'Times New Roman'">Приведения словоформы к лемме</h2>

In [None]:
import string
import pymorphy3
import panel as pn
import ipywidgets as widgets
from ipywidgets.widgets import Label, Layout, HBox
from ipydatagrid import  DataGrid, TextRenderer, BarRenderer, Expr
import pandas as pd
import gspread

import doexample
pn.extension('tabulator')

<h3 style="font-family: 'Times New Roman'">Загрузите файл 'json' для доступа к таблице Google</h3>

In [None]:
uploader = widgets.FileUpload(accept='.json', multiple=False)
display(uploader)

<h3 style="font-family: 'Times New Roman'">Сохранить файл в локальную папку</h3>

In [None]:
import os
try:
    tmpath = f'keys'
    os.mkdir(tmpath)
except Exception as ex:
    pass
uploaded_file = uploader.value[0]
with open(f"../keys/{uploaded_file.name}", "wb") as fp:
    fp.write(uploaded_file.content)
print(f'Saved file: {uploaded_file.name}')

<h3 style="font-family: 'Times New Roman'">Подключение к таблице</h3>

In [None]:
sa = gspread.service_account(filename=f'../keys/{uploaded_file.name}')
book = sa.open("tokens")

<h3 style="font-family: 'Times New Roman'">Выберите лист таблицы</h3>

In [None]:
worksheet_list = book.worksheets()
drop_list = [(b.title, i+1) for i, b in enumerate(worksheet_list)]

widget_sheet = widgets.Dropdown(
    options=drop_list,
    value=1,
    description='Название листа:',
    layout = widgets.Layout(width='400px'),
    style={'description_width': 'initial'}
)
display(widget_sheet)

<h3 style="font-family: 'Times New Roman'">Выберите тему (заголовок столбца)</h3>

In [None]:
work_sheet = book.worksheet(drop_list[0][widget_sheet.value-1])
heads_sheet = work_sheet.get('1:1')
drop_heads_list = {(b, i+1) for i, b in enumerate(heads_sheet[0]) if len(b) > 0}

widget_sheet_theme = widgets.Dropdown(
    options=drop_heads_list, value=1, description='Тема:',
    layout = widgets.Layout(width='300px'),
    style={'description_width': 'initial'}
)
display(widget_sheet_theme)

<h3 style="font-family: 'Times New Roman'">Список граммем Opencorpora</h3>

In [None]:
dataframe = doexample.cache_grammemes("../resources/grammemes.xlsx")
fr = pd.DataFrame.from_dict(dataframe)
df = pd.DataFrame(fr)

fsize = TextRenderer(font=Expr("'12px Calibri'"))
renderers = {"Граммема": fsize, "Группа": fsize, "Значение": fsize, "Примеры": fsize, "Пояснение": fsize,} 
datagrid = DataGrid(df, layout={"height": "300px", "width": "950px"}, selection_mode="cell", base_row_size=21, base_column_size=170, renderers=renderers)
datagrid

<h3 style="font-family: 'Times New Roman'">Настройка ограничений для токенов</h3>

In [None]:
descr = fr.iloc[:, [0, 1]]
mytags = [b[0] for b in descr.values.tolist()]

tags = widgets.TagsInput(
    value=['CONJ', 'PRCL', 'NUMB', 'PNCT', 'PREP', 'NPRO', 'ADVB'],
    allowed_tags=mytags, allow_duplicates=False)
tags

<h3 style="font-family: 'Times New Roman'">Функция очистки буфера</h3>

In [None]:
output = widgets.Output()
button = widgets.Button(
    description='Очистить буфер',
    disabled=False,
    button_style='', 
    tooltip='Click me',
    icon='check',)

def on_button_clicked(b):
  with output:
    wtx.value = ''
button.on_click(on_button_clicked)

<h3 style="font-family: 'Times New Roman'">Буфер для вставки текста</h3>

In [None]:
%%html
<style>
    .wtx_bg{width:auto; background-color:#F5F5DC;}
</style>

In [None]:
wtx = widgets.Textarea(
    value='первое 1.1 число 2, дня и года',
    placeholder='Type something',
    description='Буфер:',
    disabled=False,
    style={'description_height': 'initial', 'description_width': 'initial'},
    layout = widgets.Layout(width='1000px', min_height='90px', padding='10px 20px 10px 0px', justify_content='flex-end')
)
wtx.add_class('wtx_bg')
display(wtx)
display(button, output)

<h3 style="font-family: 'Times New Roman'">Детектор языкы текста в буфере (украинский, русский)</h3>

In [None]:
from langdetect import detect
radio_choice = None
if wtx.value:
    language = detect(wtx.value)
    choice = 'Украинский'
    vc_from = "ukrainian"
    vc_to = "russian"
    
    if language == 'ru':
        choice = 'Русский'
        x = vc_from
        vc_from = vc_to
        vc_to = x
    
    radio_choice = widgets.RadioButtons(
        options=['Украинский', 'Русский',],
        value = choice,
        layout={'width': 'max-content'},
        description='Детектор языка текста в буфере:',
        disabled=False)
radio_choice

<h3 style="font-family: 'Times New Roman'">Обработчик текста</h3>

In [None]:
from tqdm.notebook import tqdm
from mtrans import BingTranslator, DeepLTranslator, GoogleTranslator
from mtrans import RateLimitException

if language == 'ru':
    choice = 'Русский'
    vc_from = "ru"
    vc_to = "uk"
else:
    choice = 'Украинский'
    vc_from = "uk"
    vc_to = "ru"

text = wtx.value.translate(str.maketrans('', '', string.punctuation))
morph = pymorphy3.MorphAnalyzer(lang=language)
words = text.split()

fnormal = []
itoken = []
mtag = []
lang = []

for b in tqdm(range(len(words))):
    word = words[b]
    e = [t for t in tags.value if t in morph.parse(word)[0].tag]
    if len(e) == 0:
        itoken.append(str(word))
        fnormal.append(str(morph.parse(word)[0].normal_form))
        mtag.append(str(morph.parse(word)[0].tag))
        
        bing_translator = BingTranslator()  
        bing_translation = bing_translator.translate(fnormal[-1], vc_from, vc_to)
        bing_res = bing_translation['translations'][0]['text']
        
        # google_translator = GoogleTranslator()  
        # google_translation = google_translator.translate(fnormal[-1], vc_from, vc_to)
        # google_res = google_translation['sentences'][0]['trans']

        bing_normal_form = str(morph.parse(bing_res)[0].normal_form)
        if len(bing_res) > 0 : lang.append(bing_normal_form.lower())
        
base = {'Нормальная форма' : fnormal, 'Перевод': lang, 'Начальная' : itoken, 'Тег' : mtag}
df_normal = pd.DataFrame(base)

fsize = TextRenderer(font=Expr("'12px Calibri'"))
renderers = {"Нормальная форма": fsize, "Перевод": fsize, "Начальная": fsize, "Тег": fsize} 
lemma_table = DataGrid(df_normal, layout={"height": "250px", "width": "1000px"}, selection_mode="cell", base_row_size=21, base_column_size=170, renderers=renderers)
lemma_table

<h3 style="font-family: 'Times New Roman'">Загрузить лексемы из Google листа по теме</h3>

In [None]:
fval_ua = [b for b in work_sheet.col_values(widget_sheet_theme.value) if b]
fval_ru = [b for b in work_sheet.col_values(widget_sheet_theme.value + 1) if b]

max_len = max(len(fval_ua), len(fval_ru))
while len(fval_ua) < max_len: fval_ua.append(None)
while len(fval_ru) < max_len: fval_ru.append(None)

fval_name_first = '_'.join([fval_ua[0], fval_ua[1]])
fval_name_second = '_'.join([fval_ru[0], fval_ru[1]])

gbase = {fval_name_first : fval_ua[2:], fval_name_second : fval_ru[2:]}
gbase_pd = pd.DataFrame.from_dict(gbase)

fsize = TextRenderer(font=Expr("'12px Calibri'"))
renderers = {fval_name_first: fsize, fval_name_second: fsize} 
gtable = DataGrid(gbase_pd, layout={"height": "250px","width": "1000px"}, selection_mode="cell", base_row_size=21, base_column_size=300, renderers=renderers)
gtable

<h3 style="font-family: 'Times New Roman'">Редактор объединенных лемм</h3>

In [None]:
lemma_view = lemma_table.data
lemma_column_data = lemma_view.to_dict()
lemma_normal = {v for k, v in lemma_column_data['Нормальная форма'].items()}
lemma_translate = {v for k, v in lemma_column_data['Перевод'].items()}

# print('lemma', vc_from, lemma_normal, sep=" |")
# print('lemma', vc_to, lemma_translate, sep=" |")

google_view = gtable.data
google_column_data = google_view.to_dict()
google_sheet_uk = {v for k, v in google_column_data[fval_name_first].items()}
google_sheet_ru = {v for k, v in google_column_data[fval_name_second].items()}

# print('Google', fval_name_first, google_sheet_uk, sep=" |")
# print('Google', fval_name_second, google_sheet_ru, sep=" |")

if vc_from == 'uk':
    union_uk = {'uk': list(set.union(google_sheet_uk, lemma_normal))}
    union_ru = {'ru': list(set.union(google_sheet_ru, lemma_translate))}
elif vc_from == 'ru':
    union_uk = {'uk': list(set.union(google_sheet_uk, lemma_translate))}
    union_ru = {'ru': list(set.union(google_sheet_ru, lemma_normal))}

frame1 = pd.DataFrame({"Ukraine": union_uk['uk'], "Russian": union_ru['ru']})
mytable = pn.widgets.Tabulator(value=frame1)
button1 = pn.widgets.Button(name="Добавить строку")
button2 = pn.widgets.Button(name="Удалить выделленные строки")

def change_data(_):
    frame2 = pd.DataFrame({"Ukraine": [None], "Russian": [None]})
    mytable.stream(frame2)
    
def remove_selected_rows(_):
    mytable.value = mytable.value.drop(mytable.selection)

button1.on_click(change_data)
button2.on_click(remove_selected_rows)
pn.Column(pn.Row(button1,button2), mytable).servable()

<h3 style="font-family: 'Times New Roman'">Отправить данные в Google лист</h3>

In [None]:
goutput = widgets.Output()
gbutton = widgets.Button(
    description='Перезаписать',
    disabled=False,
    button_style='', 
    tooltip='Click me',
    icon='check',)

def on_button_clicked_google(b):
  with goutput:
        out_data = mytable.value
        out_data_dict = out_data.to_dict()
        out_data_list_uk = [v for k, v in out_data_dict['Ukraine'].items()]
        
        for b in work_sheet.col_values(widget_sheet_theme.value):
            if b in out_data_list_uk:
                out_data_list_uk.remove(b)
        print(out_data_list_uk)
        
        out_data_list_ru = [v for k, v in out_data_dict['Russian'].items()]
        for b in work_sheet.col_values(widget_sheet_theme.value + 1):
            if b in out_data_list_ru:
                out_data_list_ru.remove(b)
        print(out_data_list_ru)
        print('Заглушка')
      
gbutton.on_click(on_button_clicked_google)
display(gbutton, goutput)