In [1]:
import collections
import nltk.corpus
import re

In [2]:
# convert to all uppercase set
ENGLISH_DICT = set(w.upper() for w in nltk.corpus.words.words())
# remove single letters except for 'A' and 'I'
for ch in 'BCDEFGHJKLMNOPQRSTUVWXYZ':
    ENGLISH_DICT.remove(ch)

In [3]:
TEXT = re.sub(r'\s+', ' ', '''
    LRVMNIR BPR SUMVBWVR JX BPR LMIWV YJERYRKBI JX QMBM 
    WI BPR XJVNI MKD YMIBRUT JX IRHX WI BPR RIIRKVR JX 
    YMBINLMTMIPW UTN QMUMBR DJ W IPMHH BUT BJ RHNVWDMBR 
    BPR YJERYRKBI JX BPR QMBM MVVJUDWKO BJ YT WKBRUSURBMBWJK 
    LMIRD JK XJUBT TRMUI JX IBNDT WB WI KJB MK RMIT BMIQ 
    BJ RASHMWK RMVP YJERYRKB MKD WBI IWOKWXWVMKVR MKD 
    IJYR YNIB URYMWK NKRASHMWKRD BJ OWER M VJYSHRBR 
    RASHMKMBWJK JKR CJNHD PMER BJ LR FNMHWXWRD MKD WKISWURD 
    BJ INVP MK RABRKB BPMB PR VJNHD URMVP BPR IBMBR JX 
    RKHWOPBRKRD YWKD VMSMLHR JX URVJOKWGWKO IJNKDHRII IJNKD 
    MKD IPMSRHRII IPMSR W DJ KJB DRRY YTIRHX BPR XWKMH 
    MNBPJUWBT LNB YT RASRUWRKVR CWBP QMBM PMI HRXB KJ DJNLB
    BPMB BPR XJHHJCWKO WI BPR SUJSRU MSSHWVMBWJK MKD 
    WKBRUSURBMWJK W JXXRU YT BPRJUWRI WK BPR PJSR BPMB BPR 
    RIIRKVR JX JQWKMCMK QMUMBR CWHH URYMWK WKBMVB
'''.strip())

# English letters sorted by frequency
# see: https://www3.nd.edu/~busiforc/handouts/cryptography/letterfrequencies.html
LETTERS_SORTED_BY_FREQ = "EARIOTNSLCUDPMHGBFYWKVXZJQ"

In [4]:
# get letters in cypher `TEXT` and sort by frequency
cypher_letters_sorted_by_freq = [
    t[0] for t in sorted(
        collections.Counter(TEXT).items(), 
        key=lambda t: t[1],
        reverse=True
    )
    if t[0] in LETTERS_SORTED_BY_FREQ
]

In [24]:
# encrypted letter -> original letter
solution = dict.fromkeys(LETTERS_SORTED_BY_FREQ)

# two stacks

def assign(cypher_letters, remaining_letters):
    
    if not cypher_letters:
        # all letters assigned
        return True
    
    ch1 = cypher_letters.pop(0)
    
    i = -1
    while i < len(remaining_letters) - 1:
        i += 1
        
        solution[ch1] = remaining_letters[i]

        if deadend(solution, TEXT):
            # reverse partial solution
            solution[ch1] = None
            continue
            
        ch2 = remaining_letters.pop(i)
        if assign(cypher_letters, remaining_letters):
            return True
        
        # else: backtrack
        remaining_letters.insert(i, ch2)
        solution[ch1] = None
    
    # reset before returning
    cypher_letters.insert(0, ch1)
    return False

# theshold chosen through trial-and-error

def deadend(partial_solution, text, threshold=0.85):
    
    text2 = substitute(partial_solution, TEXT)
    # get all words that are complete
    words = set([w for w in text2.split() if '_' not in w])
    if not words:
        return False
    
    # if more than `threshold` % are words in dictionary, *not* a deadend
    if sum(w in ENGLISH_DICT for w in list(words)) / len(words) > threshold:
        #print('GOOD:', [w for w in list(words) if w in ENGLISH_DICT])
        return False
    
    #print('DEADEND:', [w for w in list(words) if w not in ENGLISH_DICT])
    
    return True
    
def substitute(partial_solution, text):
    s = ''
    for ch in text:
        if ch.strip():
            s += partial_solution[ch] if partial_solution[ch] else '_'
        else:
            s += ' '
    return s

In [25]:
solution = dict.fromkeys(LETTERS_SORTED_BY_FREQ)

cypher_letters_sorted_by_freq = [
    t[0] for t in sorted(
        collections.Counter(TEXT).items(), 
        key=lambda t: t[1],
        reverse=True
    )
    if t[0] in LETTERS_SORTED_BY_FREQ
]

assign(cypher_letters_sorted_by_freq, list(LETTERS_SORTED_BY_FREQ))

True

In [26]:
substitute(solution, TEXT)

'BECAUSE THE PRACTICE OF THE BASIC MOVEMENTS OF KATA IS THE FOCUS AND MASTERY OF SELF IS THE ESSENCE OF MATSUBAYASHI RYU KARATE DO I SHALL TRY TO ELUCIDATE THE MOVEMENTS OF THE KATA ACCORDING TO MY INTERPRETATION BASED ON FORTY YEARS OF STUDY IT IS NOT AN EASY TASK TO EXPLAIN EACH MOVEMENT AND ITS SIGNIFICANCE AND SOME MUST REMAIN UNEXPLAINED TO GIVE A COMPLETE EXPLANATION ONE WOULD HAVE TO BE ZUALIFIED AND INSPIRED TO SUCH AN EXTENT THAT HE COULD REACH THE STATE OF ENLIGHTENED MIND CAPABLE OF RECOGNIJING SOUNDLESS SOUND AND SHAPELESS SHAPE I DO NOT DEEM MYSELF THE FINAL AUTHORITY BUT MY EXPERIENCE WITH KATA HAS LEFT NO DOUBT THAT THE FOLLOWING IS THE PROPER APPLICATION AND INTERPRETAION I OFFER MY THEORIES IN THE HOPE THAT THE ESSENCE OF OKINAWAN KARATE WILL REMAIN INTACT'