In [1]:
import xml.etree.ElementTree

import nltk

nltk.download("punkt", quiet=True)

import nltk.tokenize as tokenize

CODE_WORDS = ["0010", "1011", "1000", "1011", "10", "0110", "0011", "1", "1101"]

MORSE_CODE_DICT = {
    "A": ".-",
    "B": "-...",
    "C": "-.-.",
    "D": "-..",
    "E": ".",
    "F": "..-.",
    "G": "--.",
    "H": "....",
    "I": "..",
    "J": ".---",
    "K": "-.-",
    "L": ".-..",
    "M": "--",
    "N": "-.",
    "O": "---",
    "P": ".--.",
    "Q": "--.-",
    "R": ".-.",
    "S": "...",
    "T": "-",
    "U": "..-",
    "V": "...-",
    "W": ".--",
    "X": "-..-",
    "Y": "-.--",
    "Z": "--..",
    "1": ".----",
    "2": "..---",
    "3": "...--",
    "4": "....-",
    "5": ".....",
    "6": "-....",
    "7": "--...",
    "8": "---..",
    "9": "----.",
    "0": "-----",
    ", ": "--..--",
    ".": ".-.-.-",
    "?": "..--..",
    "/": "-..-.",
    "-": "-....-",
    "(": "-.--.",
    ")": "-.--.-",
}

MORSE_CODE_DICT_INV = {value: key for key, value in MORSE_CODE_DICT.items()}

ALPHABET = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
INDEX_TO_ALPHA = {idx: x for idx, x in enumerate(ALPHABET)}
ALPHA_TO_INDEX = {x: idx for idx, x in enumerate(ALPHABET)}


def keyword_dictionary(dictionary_path):
    root_element = xml.etree.ElementTree.parse(dictionary_path).getroot()
    keywords = set()

    for element in root_element.findall(".//*"):
        if element.text:
            keywords.update((x.lower() for x in tokenize.word_tokenize(element.text)))

    return keywords


def binary_to_morse(value):
    return value.replace("0", "-").replace("1", ".")


def caesar_cipher(x, shift):
    return INDEX_TO_ALPHA[(ALPHA_TO_INDEX[x] + shift) % 26]


def solve(dictionary_path="./data/JMdict_e", code_words=None):
    if code_words is None:
        code_words = CODE_WORDS

    keywords = keyword_dictionary(dictionary_path)

    morse_cipher = [MORSE_CODE_DICT_INV[binary_to_morse(x)] for x in code_words]

    candidate_solutions = set()

    for idx, shift in enumerate(range(26)):
        cipher = "".join(caesar_cipher(x, shift) for x in morse_cipher).lower()
        print(
            f"shift: {idx:>2}, cipher: {cipher}, dictionary hit: {cipher in keywords}"
        )
        if cipher in keywords:
            candidate_solutions.add(cipher)

    return candidate_solutions


solve()

shift:  0, cipher: qljlaxzef, dictionary hit: False
shift:  1, cipher: rmkmbyafg, dictionary hit: False
shift:  2, cipher: snlnczbgh, dictionary hit: False
shift:  3, cipher: tomodachi, dictionary hit: True
shift:  4, cipher: upnpebdij, dictionary hit: False
shift:  5, cipher: vqoqfcejk, dictionary hit: False
shift:  6, cipher: wrprgdfkl, dictionary hit: False
shift:  7, cipher: xsqsheglm, dictionary hit: False
shift:  8, cipher: ytrtifhmn, dictionary hit: False
shift:  9, cipher: zusujgino, dictionary hit: False
shift: 10, cipher: avtvkhjop, dictionary hit: False
shift: 11, cipher: bwuwlikpq, dictionary hit: False
shift: 12, cipher: cxvxmjlqr, dictionary hit: False
shift: 13, cipher: dywynkmrs, dictionary hit: False
shift: 14, cipher: ezxzolnst, dictionary hit: False
shift: 15, cipher: fayapmotu, dictionary hit: False
shift: 16, cipher: gbzbqnpuv, dictionary hit: False
shift: 17, cipher: hcacroqvw, dictionary hit: False
shift: 18, cipher: idbdsprwx, dictionary hit: False
shift: 19, ci

{'tomodachi'}