In [0]:
import numpy as np

moby = open('moby_dick.txt', 'r')
train_data = moby.read()
moby.close()

letters = np.array(list((np.arange(65, 91)).astype(np.uint8).tostring().decode("ascii")))

message = preprocess_text('''I then lounged down the street and found,
as I expected, that there was a mews in a lane which runs down
by one wall of the garden. I lent the ostlers a hand in rubbing
down their horses, and received in exchange twopence, a glass of
half-and-half, two fills of shag tobacco, and as much information
as I could desire about Miss Adler, to say nothing of half a dozen
other people in the neighbourhood in whom I was not in the least
interested, but whose biographies I was compelled to listen to.
''')

In [0]:
import re

def generate_random_cipher(letters):
    return dict(zip(letters, np.random.permutation(letters)))

def preprocess_text(text):
    text = text.upper()
    regex = re.compile('[^a-zA-Z]')
    text = regex.sub('', text)
    return text

def bigrams_list(letters):
    bigrams = []
    for i in range(len(letters)):
        for j in range(len(letters)):
            bigrams.append(letters[i] + letters[j])

    return bigrams

def create_language_model(text, letters):
    unigrams = dict(zip(letters, np.zeros(len(letters), dtype=int)))
    size = len(text)
    for c in text:
        unigrams[c] += 1

    unigrams_proba = dict(zip(letters, np.zeros(len(letters), dtype=int)))
    for key in unigrams_proba:    
        unigrams_proba[key] = unigrams[key]/size
    
    bigrams_l = bigrams_list(letters)
    bigrams = dict(zip(bigrams_l, np.zeros(len(bigrams_l), dtype=int)))
    for i in range(len(text) - 1):
        bigram = text[i] + text[i+1]
        bigrams[bigram] += 1

    bigrams_proba = dict(zip(bigrams_l, np.zeros(len(bigrams_l), dtype=int)))
    for key, value in bigrams_proba.items():    
        A = key[0]
        B = key[1]
        bigrams_proba[key] = (bigrams[key]+1)/(unigrams[B]+len(letters))

    return unigrams_proba, bigrams_proba

def log_likelihood(message, unigrams, bigrams):
    log_l = np.log(unigrams[message[0]])
    for i in range(1, len(message)):
        log_l += np.log(bigrams[message[i-1] + message[i]])

    return log_l

def encode(cipher, message):
    message = list(message)
    for i in range(len(message)):
        message[i] = cipher[message[i]]

    return ''.join(message)

def decode(cipher, message):
    message = list(message)
    for i in range(len(message)):
        message[i] = list(cipher.keys())[list(cipher.values()).index(message[i])]

    return ''.join(message)

In [112]:
t = preprocess_text(train_data)
unigrams, bigrams = create_language_model(t, letters)

SYFNTAPZTENUUPDTYFNWYGNNYLTUVPZTULWSNOJNRYNUYFLYYFNGNDLWLKNDWSTLALTNDFSRFGZTWUPDTBHPTNDLAAPVYFNELGUNTSANTYYFNPWYANGWLFLTUSTGZBBSTEUPDTYFNSGFPGWNWLTUGNRNSINUSTNORFLTENYDPJNTRNLEALWWPVFLAVLTUFLAVYDPVSAAWPVWFLEYPBLRRPLTULWKZRFSTVPGKLYSPTLWSRPZAUUNWSGNLBPZYKSWWLUANGYPWLHTPYFSTEPVFLAVLUPQNTPYFNGJNPJANSTYFNTNSEFBPZGFPPUSTDFPKSDLWTPYSTYFNANLWYSTYNGNWYNUBZYDFPWNBSPEGLJFSNWSDLWRPKJNAANUYPASWYNTYP
ITHENLOUNGEDDOWNTHESTREETANDFOUNDASIEXPECTEDTHATTHEREWASAMEWSINALANEWHICHRUNSDOWNBYONEWALLOFTHEGARDENILENTTHEOSTLERSAHANDINRUBBINGDOWNTHEIRHORSESANDRECEIVEDINEXCHANGETWOPENCEAGLASSOFHALFANDHALFTWOFILLSOFSHAGTOBACCOANDASMUCHINFORMATIONASICOULDDESIREABOUTMISSADLERTOSAYNOTHINGOFHALFADOZENOTHERPEOPLEINTHENEIGHBOURHOODINWHOMIWASNOTINTHELEASTINTERESTEDBUTWHOSEBIOGRAPHIESIWASCOMPELLEDTOLISTENTO
