In [1]:
import numpy as np
import matplotlib.pyplot as plt
import string
import random
import re
import requests
import os
import textwrap

In [2]:
# create substituion cipher

letters1 = list(string.ascii_lowercase)
letters2 = list(string.ascii_lowercase)

true_mapping = {}

random.shuffle(letters2)

# populate map
for k, v in zip(letters1,letters2):
    true_mapping[k] = v

In [5]:
# language model

# initial markov matrix
M = np.ones((26,26))
# initial state distribution
pi = np.zeros(26)

# function to update markov matrix
def update_transition(ch1,ch2):
    # ord('a') = 97, ord('b') = 98,...
    i = ord(ch1) - 97
    j = ord(ch2) - 97
    M[i,j] += 1

# function to update initial state distribution
def update_pi(ch):
    i = ord(ch) - 97
    pi[i] += 1

# get the log probability of a word/token
def get_word_prob(word):
    i = ord[word[0]] - 97
    logp = np.log(pi[i])
    
    for ch in word[1:]:
        j = ord[ch] - 97
        logp += np.log(M[i,j])
        i = j
    return logp

# get the probability of sequence of words
def get_sequence_probability(words):
    if type(words) == str:
        words = words.split()
    logp = 0
    for word in words:
        logp += get_word_prob(word)
    return logp

In [14]:
regex = re.compile('[^a-zA-Z]')

for line in open('moby_dick.txt',encoding='utf-8'):
    line = line.rstrip()
    if line:
        line = regex.sub(' ', line)
        tokens = line.lower().split()
        for token in tokens:
            ch0 = token[0]
            update_pi(ch0)
            
            # other letters
            for ch1 in token[1:]:
                update_transition(ch0,ch1)
                ch0 = ch1
    # normalize the probabilities
    pi /= pi.sum()
    M /= M.sum(axis=1,keepdims=True)

In [15]:
pi

array([1.82617134e-01, 7.63873099e-05, 1.80555582e-01, 1.49525408e-02,
       5.89247993e-15, 1.66667106e-01, 5.45290621e-06, 2.78486395e-02,
       1.48867670e-02, 5.53517367e-26, 2.80799957e-36, 9.92067232e-04,
       1.48867909e-02, 1.98449039e-03, 3.33410397e-01, 9.97514390e-04,
       1.37218916e-84, 2.77777781e-02, 1.52411032e-02, 1.50339690e-02,
       9.97538629e-04, 1.19463184e-11, 1.06874133e-03, 0.00000000e+00,
       1.86689290e-27, 0.00000000e+00])

In [16]:
M

array([[0.00000000e+000, 3.58819384e-014, 1.11111213e-001,
        1.73611272e-004, 0.00000000e+000, 5.55555556e-002,
        8.68055556e-004, 3.58819384e-014, 7.84145930e-003,
        0.00000000e+000, 8.68055556e-004, 5.88352784e-006,
        8.04115161e-010, 6.73676048e-001, 0.00000000e+000,
        4.19097276e-015, 0.00000000e+000, 6.97917673e-002,
        1.40630787e-002, 6.34121657e-002, 9.96720169e-016,
        8.68055556e-004, 8.68055716e-004, 4.01877572e-009,
        8.96990741e-004, 1.61926312e-177],
       [1.76606355e-008, 2.27864583e-003, 0.00000000e+000,
        0.00000000e+000, 5.01965346e-001, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 9.27183633e-007,
        1.82848721e-088, 0.00000000e+000, 4.55729608e-003,
        2.82570168e-007, 0.00000000e+000, 6.78181862e-005,
        0.00000000e+000, 0.00000000e+000, 7.06468540e-008,
        1.62795738e-004, 3.76345494e-056, 1.14013674e-001,
        0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        3.769

In [36]:
original_message = '''I then lounged down the street and found, as I expected, that there was a mews in a lane which 
runs down by one wall of the garden. I lent the ostlers a hand in rubbing down their horses, and received in 
exchange twopence, a glass of half-and-half, two fills of shag tobacco, and as much information as I could desire 
about Miss Adler, to say nothing of half a dozen other people in the neighbourhood in whom I was not in the least 
interested, but whose biographies I was compelled to listen'''

In [47]:
# function to encode message
def encode_message(msg):
    msg = msg.lower()
    msg = regex.sub(' ', msg)
    coded_msg = []
    # make it encoded
    for ch in msg:
        coded_ch = ch
        if ch in true_mapping:
            coded_ch = true_mapping[ch]
            
        coded_msg.append(coded_ch)
    return ''.join(coded_msg)

def decode_msg(msg,word_map):
    msg = msg.lower()
    msg = regex.sub(' ',msg)
    print(msg)
    decoded_msg = []
    for ch in msg:
        decoded_ch = ch
        if ch in word_map[ch]:
            decoded_ch = word_map[ch]
        decoded_msg.append(decoded_ch)
    return ''.join(decoded_msg)
            

In [37]:
encoded_msg = encode_message(original_message)
print(original_message)
encoded_msg

I then lounged down the street and found, as I expected, that there was a mews in a lane which 
runs down by one wall of the garden. I lent the ostlers a hand in rubbing down their horses, and received in 
exchange twopence, a glass of half-and-half, two fills of shag tobacco, and as much information as I could desire 
about Miss Adler, to say nothing of half a dozen other people in the neighbourhood in whom I was not in the least 
interested, but whose biographies I was compelled to listen


'i jbch dpzhocm mpth jbc gjwccj rhm xpzhm  rg i cvlckjcm  jbrj jbcwc trg r nctg ih r drhc tbikb  wzhg mpth qa phc trdd px jbc orwmch  i dchj jbc pgjdcwg r brhm ih wzqqiho mpth jbciw bpwgcg  rhm wckcifcm ih  cvkbrhoc jtplchkc  r odrgg px brdx rhm brdx  jtp xiddg px gbro jpqrkkp  rhm rg nzkb ihxpwnrjiph rg i kpzdm mcgiwc  rqpzj nigg rmdcw  jp gra hpjbiho px brdx r mpuch pjbcw lcpldc ih jbc hciobqpzwbppm ih tbpn i trg hpj ih jbc dcrgj  ihjcwcgjcm  qzj tbpgc qipowrlbicg i trg kpnlcddcm jp digjch'

In [22]:
# algorithm to decoded message.
dna_pool = []
for _ in range(20):
    dna = list(string.ascii_lowercase)
    random.shuffle(dna)
    dna_pool.append(dna)

In [24]:
len(dna_pool)

20

In [26]:
len(dna_pool[0])

26

In [27]:
def evolve_offspring(dna_pool,n_childrens):
    offspring = []
    for dna in dna_pool:
        for _ in range(n_childrens):
            copy = dna.copy()
            j = np.random.randint(len(copy))
            k = np.random.randint(len(copy))
            
            # switch
            tmp = copy[j]
            copy[j] = copy[k]
            copy[k] = k
            
            offspring.append(copy)
    return offspring + dna_pool

In [32]:
dnas = evolve_offspring(dna_pool,2)
len(dnas)

60

In [51]:
num_iters = 10
scores = np.zeros(num_iters)
best_dna = None
best_map = None
best_score = float('-inf')
for i in range(num_iters):
    if i > 0:
        # get offspring from current dna pool
        dna_pool = evolve_offspring(dna_pool,3)
    # calculate score for each dna
    dna2score = {}
    for dna in dna_pool:
        current_map = {}
        print('dna\n',dna)
        for k,v in zip(letters1,dna):
            current_map[k] = v
        print(current_map)
        decoded_message = decode_msg(encoded_msg,current_map)
        score = get_sequence_probability(decoded_message)
            
        # store it
        dna2score[''.join(dna)] = score
        # record the best so far
        if score > best_score:
            best_dna = dna
            best_map = current_map
            best_score = score
        # average score for this generation
        scores[i] = np.mean(list(dna2score.values()))
            
        # keep the best 5 dna
        sorted_dna = sorted(dna2score.items(),key=lambda x: x[1],reverse=True)
        dna_pool = [list(k) for k,v in sorted_dna[:5]]
            
        if i % 10 == 0:
            print("iter:",i, " score ", scores[i], " best so far ", best_score)

dna
 [0, 'j', 2, 'e', 'p', 'o', 'b', 'a', 'g', 9, 'c', 'q', 16, 'u', 14, 15, 16, 0, 'd', 'v', 'm', 'i', 'z', 'l', 't', 'x']
{'a': 0, 'b': 'j', 'c': 2, 'd': 'e', 'e': 'p', 'f': 'o', 'g': 'b', 'h': 'a', 'i': 'g', 'j': 9, 'k': 'c', 'l': 'q', 'm': 16, 'n': 'u', 'o': 14, 'p': 15, 'q': 16, 'r': 0, 's': 'd', 't': 'v', 'u': 'm', 'v': 'i', 'w': 'z', 'x': 'l', 'y': 't', 'z': 'x'}
i jbch dpzhocm mpth jbc gjwccj rhm xpzhm  rg i cvlckjcm  jbrj jbcwc trg r nctg ih r drhc tbikb  wzhg mpth qa phc trdd px jbc orwmch  i dchj jbc pgjdcwg r brhm ih wzqqiho mpth jbciw bpwgcg  rhm wckcifcm ih  cvkbrhoc jtplchkc  r odrgg px brdx rhm brdx  jtp xiddg px gbro jpqrkkp  rhm rg nzkb ihxpwnrjiph rg i kpzdm mcgiwc  rqpzj nigg rmdcw  jp gra hpjbiho px brdx r mpuch pjbcw lcpldc ih jbc hciobqpzwbppm ih tbpn i trg hpj ih jbc dcrgj  ihjcwcgjcm  qzj tbpgc qipowrlbicg i trg kpnlcddcm jp digjch


KeyError: ' '

In [44]:
current_map

{' ': ' ',
 'a': 0,
 'b': 's',
 'c': 'm',
 'd': 'e',
 'e': 'p',
 'f': 'o',
 'g': 'b',
 'h': 'a',
 'i': 'g',
 'j': 9,
 'k': 'c',
 'l': 'q',
 'm': 'y',
 'n': 'u',
 'o': 'j',
 'p': 15,
 'q': 16,
 'r': 'r',
 's': 'd',
 't': 'v',
 'u': 'n',
 'v': 'i',
 'w': 'z',
 'x': 'l',
 'y': 't',
 'z': 'x'}

In [42]:
letters1

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']