In [None]:
import pandas as pd
import numpy as np

Инициализация алгоритмов

In [None]:
from tg.grammar_ru.algorithms.spellcheck import SpellcheckAlgorithm
from tg.grammar_ru.algorithms.repetitions import RepetitionsAlgorithm
from tg.grammar_ru.algorithms.architecture import NlpAlgorithm

spell_check = SpellcheckAlgorithm()
repetitions = RepetitionsAlgorithm()

Текст для проверки

In [None]:
sample_text = 'Привет как дела? Вот туть ест ашибочки в нописании слоф, харашо? А вот тут есть повторение повторение!!!'

In [None]:
from dataclasses import dataclass

@dataclass
class CheckedText:
    text: str
    errors_df: pd.DataFrame

Находим все ошибки в тексе, запустив на нем все алгоритмы

In [None]:
from tg.grammar_ru import Separator

def check_text_for_grammar_errors(text: str) -> CheckedText:
    data_bundle = Separator.build_bundle(text)
    index = data_bundle.src.index
    
    grammar_ru_result = NlpAlgorithm.combine_algorithms(data_bundle, index, spell_check, repetitions)
    return CheckedText(text=sample_text, errors_df=grammar_ru_result)

In [None]:
checked_text = check_text_for_grammar_errors(text=sample_text)
checked_text.errors_df

Визуализация ошибок по типу

In [None]:
from IPython.core.display import HTML

def visualize_errors(checked_text: CheckedText) -> HTML:
    separated_text = Separator.separate_string(checked_text.text)
    merged_df = separated_text.merge(checked_text.errors_df["error_type"], left_on='word_id', right_index=True)
    return Separator.Viewer().highlight(column_name="error_type", value_to_color='auto').to_html_display(merged_df)

In [None]:
visualize_errors(checked_text)

Исправление ошибок в разделенном тексте

In [None]:
from tg.grammar_ru.algorithms.architecture import ErrorTypes

def correct_grammar_errors(checked_text: CheckedText, /, errors_filter: list[ErrorTypes] | None = None) -> pd.DataFrame:
    separated_text = Separator.separate_string(checked_text.text)
    merged_df = separated_text.merge(checked_text.errors_df[["error_type", "suggest"]], left_on='word_id', right_index=True)
    
    def correct_error(word: str, error_type: ErrorTypes, suggest: list[str] | None) -> str:
        if errors_filter is not None and error_type not in errors_filter:
            return word
        
        if suggest is None:
            return f"~{word}~"
        
        return suggest[0]
    
    has_error = ~merged_df['error_type'].isnull()
    merged_df.loc[has_error, "word"] = merged_df.loc[has_error, ["word", "error_type", "suggest"]].apply(lambda x: correct_error(*x), axis=1)
    return merged_df

In [None]:
correct_grammar_errors(checked_text, errors_filter=[ErrorTypes.Stylistic, ErrorTypes.Orthographic])

### end-to-end исправление ошибок

In [None]:
def correct_grammar_errors_in_text(text: str) -> str:
    checked_text: CheckedText = check_text_for_grammar_errors(text)
    corrected_text_df = correct_grammar_errors(checked_text)
    return Separator.Viewer().to_text(corrected_text_df)

In [None]:
correct_grammar_errors_in_text(sample_text)