In [None]:
%pip install --user pyahocorasick
%load_ext Cython

In [None]:
import codecs
from collections import Counter

with codecs.open('mono_var08.KR', 'r', 'cp1251') as f:
    encrypted = f.read()

space_sub = 'Ы'
encrypted = encrypted.replace(space_sub, ' ')
encrypted_alphabet = ''.join(l for l, cnt in Counter(encrypted).most_common() if l != ' ')

In [None]:
import ahocorasick

natwords = [
    ' И ', ' ИЛИ ', ' ДА ', ' НЕ ',
    ' ЧТО ', ' ЧТОБЫ ', ' КАК ', ' ГДЕ ', ' КУДА ', ' ОТКУДА ', ' КОГДА ', ' ДЛЯ ',
    ' С ', ' СО ', ' В ', ' ВО ',  ' ДО ', ' ПО ', ' НА ', ' НАД ', ' МЕЖДУ ', ' ИЗ ', ' ЧЕРЕЗ ',
    ' Я ', ' ТЫ ', ' ВЫ ', ' ОН ', ' ОНА ', ' ОНИ ', ' ОНО '
]

score_automaton = ahocorasick.Automaton()
for w in natwords:
    score_automaton.add_word(w, None)
score_automaton.make_automaton()

def score_text(text):
    score = 0
    for _ in score_automaton.iter(text):
        score += 1
    return score

In [None]:
%%cython

cdef int R = 3
def gen_permutations(str encr_alphabet):
    # Alphabet is sorted in most frequent -> least frequent letter
    alphabet = 'ОЕАИНТСРВЛКМДПУЯЫЗЬЪБГЧЙХЖЮШЦЩЭФ'
    # Reverse the alphabets to permute the most frequent letters first
    alphabet = alphabet[::-1]
    encr_alphabet = encr_alphabet[::-1]
    cdef int encr_alphabet_len = len(encr_alphabet)

    subs = dict()
    is_taken = set()
    i_stack = list()
    
    cdef int i = 0
    cdef int letter_i = 0
    
    while letter_i >= 0:
        key = ord(encr_alphabet[letter_i])
        while i < min(encr_alphabet_len, letter_i + R):
            if i in is_taken:
                i += 1
                continue
            is_taken.add(i)
            subs[key] = alphabet[i]
            if letter_i + 1 < encr_alphabet_len:
                i_stack.append(i)
                letter_i += 1
                i = max(0, letter_i - R)
                break
            else:
                yield subs
                is_taken.remove(i)
                i += 1
        else:
            letter_i -= 1
            if letter_i >= 0:
                i = i_stack.pop()
                is_taken.remove(i)
                i += 1

In [None]:
def brute_force(gen_count, select_count):
    tries = []
    generated = 0
    for subs in gen_permutations(encrypted_alphabet):
        repl = encrypted.translate(subs)
        score = score_text(repl)
        if score > 0:
            if len(tries) < select_count:
                tries.append((score, repl))
                tries.sort(key=lambda t: t[0], reverse=True)
            elif tries[-1][0] < score:
                tries[-1] = (score, repl)
                tries.sort(key=lambda t: t[0], reverse=True)
        generated += 1
        if generated == gen_count:
            break

    return tries

In [None]:
brute_force(gen_count=1000, select_count=5)