# Code Breaking With MCMC

In [3]:
import numpy as np
from scipy import special 
import pickle
import itertools as it
from collections import Counter

In [20]:
def clean_string(string): 
    """
    Cleans an input string by replacing all newlines with spaces, 
    and then removing all letters not in *allowable_letters*
    """
    string = string.replace("\n"," ")
    return "".join([i for i in string.lower().strip() if i in allowable_letters])


def load_bigrams(text):
    """
    Takes a string which has already been cleaned, and returns a dictionary
    of conditional bigram probabilities
    cond_bigram[(a,b)] = P(X_{n+1} = b | X_{n} = a)
    Uses Laplace smoothing with size $1$, to remove zero transition probabilities
    """
    bigram_counter = Counter(list(it.product(allowable_letters, repeat=2))) 
    gram_counter = Counter(allowable_letters*len(allowable_letters))
    for l1, l2 in zip(text,text[1:]): 
        bigram_counter[(l1, l2)] += 1 
        gram_counter[l1] += 1
    cond_bigram = {k:float(v)/float(gram_counter[k[0]]) for k,v in bigram_counter.items()}
    return bigram_counter,cond_bigram
    
def bigram_from_file(filename):
    """
    Given a filename, this reads it, cleans it, and returns the conditional bigram 
    """
    file_text = open(filename).read()
    file_text = clean_string(file_text)
    return load_bigrams(file_text)

def reverse_cipher(cipher):
    
    return {v:k for k, v in cipher.items()}


def print_differences(cipher1, cipher2):
    for k in cipher1:
        if cipher1[k] != cipher2[k]:
            print("%s: %s %s"%(k,cipher1[k], cipher2[k]))

def num_errors(cipher, encoded_text, original_text):
    decoded = np.array(list(decode_text(encoded_text, cipher)))
    original = np.array(list(original_text))
    num_errors = np.count_nonzero(decoded != original)
    return num_errors
        
    
def get_secret_text(student_id):
    with open('Lab06_data/secret_strings.p', 'rb') as f:
        texts = pickle.load(f)
    return texts[student_id % len(texts)]


In [21]:
allowable_letters = list("abcdefghijklmnopqrstuvwxyz ")
#s = "Hi,howW are you23123"
#clean_string(s)

In [22]:
big_conters,wp_bigrams = bigram_from_file("warandpeace.txt")

In [23]:
big_conters

Counter({(' ', ' '): 13628,
         (' ', 'a'): 69179,
         (' ', 'b'): 25078,
         (' ', 'c'): 20669,
         (' ', 'd'): 17112,
         (' ', 'e'): 11632,
         (' ', 'f'): 21212,
         (' ', 'g'): 9704,
         (' ', 'h'): 49763,
         (' ', 'i'): 30537,
         (' ', 'j'): 1656,
         (' ', 'k'): 4027,
         (' ', 'l'): 13122,
         (' ', 'm'): 19455,
         (' ', 'n'): 15037,
         (' ', 'o'): 33813,
         (' ', 'p'): 18697,
         (' ', 'q'): 1427,
         (' ', 'r'): 15052,
         (' ', 's'): 40675,
         (' ', 't'): 87147,
         (' ', 'u'): 5572,
         (' ', 'v'): 3902,
         (' ', 'w'): 40426,
         (' ', 'x'): 239,
         (' ', 'y'): 6662,
         (' ', 'z'): 148,
         ('a', ' '): 15083,
         ('a', 'a'): 25,
         ('a', 'b'): 3436,
         ('a', 'c'): 6973,
         ('a', 'd'): 11295,
         ('a', 'e'): 168,
         ('a', 'f'): 1667,
         ('a', 'g'): 3466,
         ('a', 'h'): 326,
         ('a',

In [19]:
wp_bigrams[('a', 'd')]

0