<h2 style="font-family: 'Times New Roman'">Приведения словоформы к лемме</h2>

In [1]:
import string
import pymorphy3
import panel as pn
import ipywidgets as widgets
from ipywidgets.widgets import Label, Layout, HBox
from ipydatagrid import  DataGrid, TextRenderer, BarRenderer, Expr
import pandas as pd
import gspread

import doexample
pn.extension('tabulator')

<h3 style="font-family: 'Times New Roman'">Загрузите файл 'json' для доступа к таблице Google</h3>

In [2]:
uploader = widgets.FileUpload(accept='.json', multiple=False)
display(uploader)

FileUpload(value=(), accept='.json', description='Upload')

<h3 style="font-family: 'Times New Roman'">Сохранить файл в локальную папку</h3>

In [3]:
import os
try:
    tmpath = f'keys'
    os.mkdir(tmpath)
except Exception as ex:
    pass

try:
    uploaded_file = uploader.value[0]
    with open(f"../keys/{uploaded_file.name}", "wb") as fp:
        fp.write(uploaded_file.content)
    print(f'Saved file: {uploaded_file.name}')
except Exception as ex:    
    print('Не загружен файл!')

Saved file: test-7fc02-8dc523f1fe64.json


<h3 style="font-family: 'Times New Roman'">Подключение к таблице</h3>

In [4]:
sa = gspread.service_account(filename=f'../keys/{uploaded_file.name}')
book = sa.open("tokens")

<h3 style="font-family: 'Times New Roman'">Список граммем Opencorpora</h3>

In [5]:
dataframe = doexample.cache_grammemes("../resources/grammemes.xlsx")
fr = pd.DataFrame.from_dict(dataframe)
df = pd.DataFrame(fr)

fsize = TextRenderer(font=Expr("'12px Calibri'"))
renderers = {"Граммема": fsize, "Группа": fsize, "Значение": fsize, "Примеры": fsize, "Пояснение": fsize,} 
datagrid = DataGrid(df, layout={"height": "300px", "width": "950px"}, selection_mode="cell", base_row_size=21, base_column_size=170, renderers=renderers)
datagrid

DataGrid(auto_fit_params={'area': 'all', 'padding': 30, 'numCols': None}, base_column_size=170, base_row_size=…

<h3 style="font-family: 'Times New Roman'">Настройка ограничений для токенов</h3>

In [6]:
descr = fr.iloc[:, [0, 1]]
mytags = [b[0] for b in descr.values.tolist()]

tags = widgets.TagsInput(
    value=['CONJ', 'PRCL', 'NUMB', 'PNCT', 'PREP', 'NPRO', 'ADVB', 'UNKN'],
    allowed_tags=mytags, allow_duplicates=False)
tags

TagsInput(value=['CONJ', 'PRCL', 'NUMB', 'PNCT', 'PREP', 'NPRO', 'ADVB', 'UNKN'], allow_duplicates=False, allo…

<h3 style="font-family: 'Times New Roman'">Буфер для вставки текста</h3>

In [7]:
%%html
<style>.wtx_bg{width:auto; background-color:#F5F5DC;}</style>

In [8]:
output = widgets.Output()
button = widgets.Button(
    description='Очистить буфер',
    disabled=False,
    button_style='', 
    tooltip='Click me',
    icon='check',)

def on_button_clicked(b):
  with output:
    wtx.value = ''
button.on_click(on_button_clicked)

wtx = widgets.Textarea(
    value='первое 1.1 число 2, дня и года',
    placeholder='Type something',
    description='Буфер:',
    disabled=False,
    style={'description_height': 'initial', 'description_width': 'initial'},
    layout = widgets.Layout(width='1000px', min_height='90px', padding='10px 20px 10px 0px', justify_content='flex-end')
)
wtx.add_class('wtx_bg')
display(wtx)
display(button, output)

Textarea(value='первое 1.1 число 2, дня и года', description='Буфер:', layout=Layout(justify_content='flex-end…

Button(description='Очистить буфер', icon='check', style=ButtonStyle(), tooltip='Click me')

Output()

<h3 style="font-family: 'Times New Roman'">Детектор языка текста в буфере (украинский, русский)</h3>

In [9]:
from langdetect import detect
radio_choice = None
if wtx.value:
    language = detect(wtx.value)
    choice = 'Украинский'
    vc_from = "ukrainian"
    vc_to = "russian"
    
    if language == 'ru':
        choice = 'Русский'
        x = vc_from
        vc_from = vc_to
        vc_to = x
    
    radio_choice = widgets.RadioButtons(
        options=['Украинский', 'Русский',],
        value = choice,
        layout={'width': 'max-content'},
        description='Детектор языка текста в буфере:',
        disabled=False)
radio_choice

RadioButtons(description='Детектор языка текста в буфере:', index=1, layout=Layout(width='max-content'), optio…

<h3 style="font-family: 'Times New Roman'">Обработчик текста буфера</h3>

In [10]:
from tqdm.notebook import tqdm
from mtrans import BingTranslator, DeepLTranslator, GoogleTranslator
from mtrans import RateLimitException

if language == 'ru':
    choice = 'Русский'
    vc_from = "ru"
    vc_to = "uk"
else:
    choice = 'Украинский'
    vc_from = "uk"
    vc_to = "ru"

text = wtx.value.translate(str.maketrans('', '', string.punctuation))
words = text.split()

fnormal = []
itoken = []
mtag = []
lang = []

morph = pymorphy3.MorphAnalyzer(lang=language)

for b in tqdm(range(len(words))):
    word = words[b]
    e = [t for t in tags.value if t in morph.parse(word)[0].tag]
    if len(e) == 0:
        itoken.append(str(word))
        morph1 = pymorphy3.MorphAnalyzer(lang=language)
        fnormal.append(str(morph1.parse(word)[0].normal_form))
        mtag.append(str(morph1.parse(word)[0].tag))
        
        # bing_translator = BingTranslator()  
        # bing_translation = bing_translator.translate(fnormal[-1], vc_from, vc_to)
        # bing_res = bing_translation['translations'][0]['text']
        
        google_translator = GoogleTranslator()  
        google_translation = google_translator.translate(fnormal[-1], vc_from, vc_to)
        google_res = google_translation['sentences'][0]['trans']

        morph2 = pymorphy3.MorphAnalyzer(lang=vc_to)
        bing_normal_form = str(morph2.parse(google_res)[0].normal_form)
        if len(google_res) > 0 : lang.append(bing_normal_form.lower())
        
base = {'Нормальная форма' : fnormal, 'Перевод': lang, 'Начальная' : itoken, 'Тег' : mtag}
df_normal = pd.DataFrame(base)

  0%|          | 0/7 [00:00<?, ?it/s]

In [11]:
# df_normal.reset_index()
mtable1 = pn.widgets.Tabulator(value=df_normal)
button1 = pn.widgets.Button(name="Добавить строку")
button2 = pn.widgets.Button(name="Удалить выделленные строки")

def change_data_tab(_):
    frame2 = pd.DataFrame({"Нормальная форма": [None], "Перевод": [None], "Начальная": [None], "Тег": [None]})
    mtable1.stream(frame2)
    
def remove_selected_rows_tab(_):
    f = mtable1.value.index.values[mtable1.selection]
    mtable1.value = mtable1.value.drop(f)

button1.on_click(change_data_tab)
button2.on_click(remove_selected_rows_tab)
pn.Column(pn.Row(button1,button2), mtable1).servable()

<h3 style="font-family: 'Times New Roman'">Выберите лист таблицы</h3>

In [12]:
worksheet_list = book.worksheets()
drop_list = [(b.title, i+1) for i, b in enumerate(worksheet_list)]

widget_sheet = widgets.Dropdown(
    options=drop_list,
    value=1,
    description='Название листа:',
    layout = widgets.Layout(width='400px'),
    style={'description_width': 'initial'}
)
display(widget_sheet)

Dropdown(description='Название листа:', layout=Layout(width='400px'), options=(('Sheet1', 1), ('Лист2', 2)), s…

<h3 style="font-family: 'Times New Roman'">Выберите тему (заголовок столбца)</h3>

In [13]:
work_sheet = book.worksheet(drop_list[widget_sheet.value-1][0])
heads_sheet = work_sheet.get('1:1')
drop_heads = [(b, i+1) for i, b in enumerate(heads_sheet[0]) if len(b) > 0 and i%2 == 0]

widget_sheet_theme = widgets.Dropdown(
    options=drop_heads, value=1, description='Тема:',
    layout = widgets.Layout(width='300px'),
    style={'description_width': 'initial'}
)
display(widget_sheet_theme)

Dropdown(description='Тема:', layout=Layout(width='300px'), options=(('themeA', 1),), style=DescriptionStyle(d…

<h3 style="font-family: 'Times New Roman'">Загрузить лексемы из Google листа по теме</h3>

In [14]:
fval_ua = [b for b in work_sheet.col_values(widget_sheet_theme.value) if b]
fval_ru = [b for b in work_sheet.col_values(widget_sheet_theme.value + 1) if b]

max_len = max(len(fval_ua), len(fval_ru))
while len(fval_ua) < max_len: fval_ua.append(None)
while len(fval_ru) < max_len: fval_ru.append(None)

fval_name_first = '_'.join([fval_ua[0], fval_ua[1]])
fval_name_second = '_'.join([fval_ru[0], fval_ru[1]])

gbase = {fval_name_first : fval_ua[2:], fval_name_second : fval_ru[2:]}
gbase_pd = pd.DataFrame.from_dict(gbase)

fsize = TextRenderer(font=Expr("'12px Arial'"))
renderers = {fval_name_first: fsize, fval_name_second: fsize} 
gtable = DataGrid(gbase_pd, layout={"height": "250px","width": "1000px"}, selection_mode="cell", base_row_size=21, base_column_size=300, renderers=renderers)
gtable

DataGrid(auto_fit_params={'area': 'all', 'padding': 30, 'numCols': None}, base_column_size=300, base_row_size=…

<h3 style="font-family: 'Times New Roman'">Редактор объединенных лемм текста буфера и Google листа</h3>

In [15]:
# lemma_view = lemma_table.data
lemma_view = mtable1.value
lemma_column_data = lemma_view.to_dict()
lemma_normal = {v for k, v in lemma_column_data['Нормальная форма'].items()}
lemma_translate = {v for k, v in lemma_column_data['Перевод'].items()}

# print('lemma', vc_from, lemma_normal, sep=" |")
# print('lemma', vc_to, lemma_translate, sep=" |")

google_view = gtable.data
google_column_data = google_view.to_dict()
google_sheet_uk = {v for k, v in google_column_data[fval_name_first].items()}
google_sheet_ru = {v for k, v in google_column_data[fval_name_second].items()}

# print('Google', fval_name_first, google_sheet_uk, sep=" |")
# print('Google', fval_name_second, google_sheet_ru, sep=" |")
union_lang = {'uk': [], 'ru': []}
if vc_from == 'uk':
    union_lang['uk'] = list(set.union(google_sheet_uk, lemma_normal))
    union_lang['ru'] = list(set.union(google_sheet_ru, lemma_translate))
elif vc_from == 'ru':
    union_lang['uk'] = list(set.union(google_sheet_uk, lemma_translate))
    union_lang['ru'] = list(set.union(google_sheet_ru, lemma_normal))

max_len = max(len(union_lang['uk']), len(union_lang['ru']))

x = union_lang['uk']
while len(x) < max_len: 
    x.append(None)
union_lang['uk'] = x

y = union_lang['ru']
while len(y) < max_len: 
    y.append(None)
union_lang['ru'] = y

frame1 = pd.DataFrame({"Ukraine": union_lang['uk'], "Russian": union_lang['ru']})
mtable2 = pn.widgets.Tabulator(value=frame1)
button3 = pn.widgets.Button(name="Добавить строку")
button4 = pn.widgets.Button(name="Удалить выделенные строки")

def change_data(_):
    frame2 = pd.DataFrame({"Ukraine": [None], "Russian": [None]})
    mtable2.stream(frame2)
    
def remove_selected_rows(_):
    f = mtable2.value.index.values[mtable2.selection]
    mtable2.value = mtable2.value.drop(f)

button3.on_click(change_data)
button4.on_click(remove_selected_rows)
pn.Column(pn.Row(button3,button4), mtable2).servable()

<h3 style="font-family: 'Times New Roman'">Отправить и записать данные в Google лист</h3>

In [16]:
goutput = widgets.Output()
gbutton = widgets.Button(
    description='Перезаписать',
    disabled=False,
    button_style='', 
    tooltip='Click me',
    icon='check',)

out_data_list_uk = None
out_data_list_ru = None
out_data = mtable2.value

def int_to_excel_column(column_int, start_index=1):
    letter = ''
    while column_int > 0:
        column_int, remainder = divmod(column_int - start_index, 26)
        letter = chr(65 + remainder) + letter
    return letter

def on_button_clicked_google(b):
  with goutput:

        out_data_dict = out_data.to_dict()
        out_data_list_uk = [v for k, v in out_data_dict['Ukraine'].items() if v]

        # print(work_sheet, widget_sheet_theme.value)
        # print(work_sheet.col_values(widget_sheet_theme.value))
        # print(out_data_list_uk)
      
        for b in work_sheet.col_values(widget_sheet_theme.value):
            if b in out_data_list_uk:
                out_data_list_uk.remove(b)
        # print(f'out_data_list_uk: {out_data_list_uk}')
        
        out_data_list_ru = [v for k, v in out_data_dict['Russian'].items() if v]
        for b in work_sheet.col_values(widget_sheet_theme.value + 1):
            if b in out_data_list_ru:
                out_data_list_ru.remove(b)
        # print(f'out_data_list_ru: {out_data_list_ru}')

        column = widget_sheet_theme.value
        column_name = int_to_excel_column(column)
        idx = len(work_sheet.col_values(column)) + 1
        addr_uk = ''.join([column_name, str(idx)])
      
        data_for_sheet_uk = [[b] for b in out_data_list_uk] 
        work_sheet.update(data_for_sheet_uk, addr_uk)
        # print(addr_uk, data_for_sheet_uk)

        column = widget_sheet_theme.value + 1
        column_name = int_to_excel_column(column)
        idx = len(work_sheet.col_values(column)) + 1
        addr_ru = ''.join([column_name, str(idx)])
      
        data_for_sheet_ru = [[b] for b in out_data_list_ru]
        work_sheet.update(data_for_sheet_ru, addr_ru)
        # print(addr_ru, data_for_sheet_ru)
        print('Выполнено!')
      
gbutton.on_click(on_button_clicked_google)
display(gbutton, goutput)

Button(description='Перезаписать', icon='check', style=ButtonStyle(), tooltip='Click me')

Output()