In [None]:
# Does not need to be executed if
# ~/.ipython/profile_default/ipython_config.py
# exists and contains:
# c.InteractiveShell.ast_node_interactivity = 'all'

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [None]:
from string import printable
from collections import Counter
from operator import itemgetter
from itertools import product
from re import split

Consider a message all written in capital letters. The _Caesar code_ fixes a natural number $n$ between 1 and 25 and shifts all letters $n$ positions to the right, wrapping around when needed, leaving all other characters such as spaces unchanged. For instance, the message ALEA JACTA EST becomes

* with $n = 1$: BMFB KBDUB FTU
* with $n = 2$: CNGC LCEVC GUV
* ...
* with $n = 10$: KVOK TKMDK OCD
* ...
* with $n = 25$: ZKDZ IZBSZ DRS

The _Vigenere cipher_ offers a generalisation as follows. Choose a word written all in uppercase letters as the key, for instance, RAW. Since RAW consists of 3 letters, write the letters that make up the message over 3 columns:

ALE  
AJA  
CTA  
EST

As R, A and W are the 18th, 1st and 23rd letters of the alphabet, respectively, all letters in the first column are shifted 18 positions to the right, all letters in the second column are shifted 1 position to the right, and all letters in the third column are shifted 23 positions to the right, wrapping around when needed:

SMB  
SKX  
UUX  
WTQ

This results in the encrypted message: SMBS KXUUX WTQ

Let us not distinguish between (capital) letters and other characters, but rather let the Vigenere cipher encrypt all characters in a message with a key that itself can be any sequence of characters. The `string` module defines a string consisting of all printable ASCII characters; it could be a good candidate for which characters to allow both in message and key:

In [None]:
printable

`'\r'` is the carriage return character:

In [None]:
print('00000\r11')

ASCII characters can be represented as `\x` followed by their ASCII code in the form of two hexadecimal digits: 

In [None]:
ord('c'), f"{ord('c'):x}", '\x63'
# f-strings cannot embed backslashes,
# so we instead use the format string method.
ord('\n'), '{:x}'.format(ord('\n')), '\x0a'

`'\x0b'` and `'\x0c'` are the vertical tab and form feed characters, respectively; Jupyter ignores them. For code run from the command line, `'\x0b'` and `'\x0c'` behave the same: the next character follows the previous one but on the next line (form feed is meant to force a printer to move to the next sheet of paper). Both `'\x0b'` and `'\x0c'` actually have escaped characters as alternatives, namely, `'\v'` and `'\f'`, respectively:

In [None]:
'\v', '\f'

It is sensible to ignore the last 3 characters in `printable` and accept all others as the possible constituents of messages and keys:

In [None]:
characters = printable[: -3]
len(characters)

Any character in `characters` can be used in keys and determine by how much to shift characters in messages; the values of the shifts are given by the positions in `characters` of the characters that make up the key. It would be inefficient to look for the position of a given character in `characters` every time it is needed: for instance, if a key was 7 characters long and started with `'G'`, it would be inefficient to scan `characters` left to right from the beginning, to find `'G'` at position 42 and infer that the first, eight, fiftieth... characters in the message to encrypt should be shifted 42 positions to the right, wrapping around if needed. We define a dictionary with the characters in `characters` as keys, and their positions in `characters` as values, so the latter can be obtained from the former in constant time: 

In [None]:
shifts = {characters[i]: i for i in range(len(characters))}
print(shifts)

Let us give an example of a message and its encryption with the key `'Take it Easy!'` (we will see below how the encryption has been computed, now it is given and supposed to have been properly computed). `'Take it Easy!'` is a key of length 13, so we align the key, the message displayed over 13 columns, and the encrypted message displayed over 13 columns. As the message length is not a multiple of 13, the last lines of message and encrypted message are less than 13 characters long; formatted strings can be given a field width to display a string, padding with spaces to the right for shorter strings, ignoring the field width for longer strings:

In [None]:
print('|'.join(f'{w:4}' for w in ('Alice', 'was', 'beginning', 'to', 'get',
                                  'very', 'tired'
                                 )
              )
     )

Next to key and message, we display the ASCII codes of the characters in key and message, similarly aligned. We observed that `characters` has 97 characters.

* The first character in `key`, namely, `'T'`, has position 55 in `characters`. The first character in `message`, namely `'A'`, has position 36 in `characters`; it is therefore encrypted in `encrypted_message` as the character that in `characters`, has position $(55 + 36)\bmod 97 = 91 \bmod 97 = 91$, namely, `'|'`.
* The fifth character in `key`, namely a space, has position 94 in `characters`. The fifth character in `message`, namely `'e'`, has position 14 in `characters`; it is therefore encrypted in `encrypted_message` as the character that in `characters`, has position $(94 + 14)\bmod 97 = 108 \bmod 97 = 11$, namely, `'b'`.

In [None]:
message = ("Alice was beginning to get very tired of sitting by her "
           "sister on the bank, and of having nothing to do: once or "
           "twice she had peeped into the book her sister was reading, "
           "but it had no pictures or conversations in it, 'and what "
           "is the use of a book,' thought Alice, 'without pictures or "
           "conversations?'"
          )

encrypted_message = ('|vCqbfZ7\'7DM;,xHwkyqq#7IM|QFyFvfWf&oFv])7MwqLL'
                     'kU7D\'X+oLbpAVqSBpW\\QDBs|tDkY@pI\\\'7It|zDsWxI'
                     'v\\<DBwkyqq#7FWGQyHqbfRoBDYQ-(7MvbfK7R7RM/=oxbf'
                     'FWlBDJMX%yIy|zHoBCK!|(BhK7KqoSkFQ\\*@hprLqf(7JI'
                     '.QxIbmAFq)BG!X<BhqlFYb&CC"=<xMbfFqf(@p5+;nhKesW'
                     '|WCp"<(7OGbfRcBkpJ]<u~?|LKl)qJ"X|vCqb|q$+sVP]\\'
                     'DhDfuWr&oUv]?7wCkNHo\'kVQ];C5?'
                    )


key = 'Take it Easy!'
print(f'{key}' + '  ' + ' '.join(f'{shifts[c]:2}' for c in key))
print()
print('\n'.join(f'{message[i * 13 : (i + 1) * 13]:13}  ' +
                ' '.join(f'{shifts[c]:2}'
                              for c in message[i * 13 : (i + 1) * 13]
                        ) for i in range(len(message) // 13 + 1)
               )
     )
print()
print('\n'.join(f'{encrypted_message[i * 13 : (i + 1) * 13]:13}  ' +
                ' '.join(f'{shifts[c]:2}'
                              for c in encrypted_message[i * 13 : (i + 1) * 13]
                        ) for i in range(len(encrypted_message) // 13 + 1)
               )
     )

Let us write a function, `encrypt()`, to encrypt a message, and a function, `decrypt()`, to decrypt an encrypted message, both with a key $K$ provided as first argument, the message being provided as second argument. Both functions perform almost identically:

* To encrypt a message $M$, one goes over each character in $M$, determine its index $i$ modulo the length of $K$, and shifts it to the right in `characters` by $n$ with $n$ the position in `characters` of the character in $K$ of index $i$, wrapping around if needed.
* To decrypt an encrypted message $E$, one goes over each character in $E$, determine its index $i$ modulo the length of $K$, and shifts it to the left in `characters` by $n$ with $n$ equal to the position in `characters` of the character in $K$ of index $i$, wrapping around if needed.

It is therefore natural to define an auxiliary function, `encrypt_or_decrypt()` called by both `encrypt()` and `decrypt()`, being directed by either whether shifting should be "to the right" or "to the left", which corresponds to adding or subtracting positions:

In [None]:
def encrypt(key, text):
    return encrypt_or_decrypt(key, text, 1)

def decrypt(key, text):
    return encrypt_or_decrypt(key, text, -1)

def encrypt_or_decrypt(key, text, mode):
    return ''.join(characters[(shifts[text[i]]
                               + shifts[key[i % len(key)]] * mode
                              ) % len(shifts)
                             ] for i in range(len(text))
                  )

encrypt(key, message)
print()
decrypt(key, encrypted_message)
print()
encrypt(key, decrypt(key, encrypted_message))
print()
decrypt(key, encrypt(key, message))

Let us now tackle the task of breaking the code of an encrypted message $E$, that is, discovering the key $K$ that was used to encrypt a message $M$ into $E$, and decrypting $E$ into $M$ with $K$. A program can only tentatively suggest candidates for $K$ that the user would then have to validate. Also, the program might not always succeed, especially when the encrypted message is short. Let us fix an upper bound on the length of $K$, the first of a number of parameters we will use in our heuristics:

In [None]:
max_key_length = 16

With that value, the number of possible keys is huge:

In [None]:
sum(len(characters) ** i for i in range(1, 17))

If the message is long enough, it is likely to contain many $n$-grams (i.e., $n$ consecutive characters) that occur many times, and less likely but still likely enough, $n$-grams with many vertically aligned occurrences. For instance, "the", "ing", "able" could be $n$-grams (with $n$ equal to 3, 3 and 4, respectively) some of whose occurrences in a message could be aligned:

. . . . t h e . .  
. . . . . . . . .  
. a b l e . . . .  
. . . . . . i n g  
. . . . . . . . .  
. . . . . . i n g  
. . . . . . . . .  
. . . . . . . . .  
. . . . . . . . .  
. . . . t h e . .  
. a b l e . . . .  
. . . . t h e . .  
. . . . . . . . .  
. . . . . . . . .  
. . . . . . i n g

All vertically aligned occurrences of a given $n$-gram are identically encrypted. Hence one can look for $n$-grams in the encrypted message that occur many times, and assume that they have a significant chance to correspond to $n$-grams in the original message that happen to be vertically aligned. Suppose for instance that %)O occurs many times in the encrypted message. Suppose that the second occurrence of %)O is 40 positions to the right of the first occurrence of %)O. Conjecturing that both occurrences of %)O are aligned, and assuming that the key is at most 16 characters long, there are 4 options (writing ~ for a slot for a character):

* The key is 4 characters long and the second occurrence of %)O is 10 lines below the first occurrence of %)O, e.g.:  
      ~ % ) O  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ ~ ~ ~  
      ~ % ) O
* The key is 5 characters long and the second occurrence of %)O is 8 lines below the first occurrence of %)O, e.g.:  
      ~ % ) O ~  
      ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~  
      ~ % ) O ~
* The key is 8 characters long and the second occurrence of %)O is 5 lines below the first occurrence of %)O, e.g.:  
      ~ ~ % ) O ~ ~ ~  
      ~ ~ ~ ~ ~ ~ ~ ~   
      ~ ~ ~ ~ ~ ~ ~ ~   
      ~ ~ ~ ~ ~ ~ ~ ~   
      ~ ~ ~ ~ ~ ~ ~ ~   
      ~ ~ % ) O ~ ~ ~
* The key is 10 characters long and the second occurrence of %)O is 4 lines below the first occurrence of %)O, e.g.:  
      ~ ~ ~ % ) O ~ ~ ~ ~  
      ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  
      ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  
      ~ ~ ~ % ) O ~ ~ ~ ~
      
In other words, the conjecture that two successive occurrences of an $n$-gram are aligned leads to the conclusion that the length of the key is a divisor at least equal to $n$ and at most equal to the value of `max_key_length` of the distance between (the start of) both occurrences. The following function returns a generator expression for the relevant divisors of its first argument, namely, those of its factors that are at least equal to its second argument and at most equal to the value of `max_key_length`:

In [None]:
def factors_from_successive_n_grams(num, min_value):
    return (i for i in range(min_value, max_key_length + 1) if num % i == 0)

tuple(factors_from_successive_n_grams(40, 3))

`factors_from_successive_n_grams()` returns a generator expression; each factor will be retrieved on demand, e.g., by calls to `next()`, or thanks to a `for` statement. We would like to generate on demand not just the relevant factors of the distance between two successive occurrences of a given $n$-gram, but the relevant factors of the distances of all pairs of successive occurrences of all $n$-grams (for the chosen values of $n$). To this aim, we introduce another mechanism, namely, generator functions. A generator function is a function with `yield` statements in its body. A `yield` statement can, but does not have to, be followed by an expression. In case an expression follows, the value of that expression is provided when the `yield` statement is executed; otherwise, `None` is provided. In any case, execution of the generator function then stops, waiting to be resumed, possibly, on demand:

In [None]:
def f(n):
    print('Before yielding', n)
    yield n
    print('Before yielding', n + 1)
    yield n + 1
    print('Before yielding None')
    yield
    print('Before yielding', n + 2)
    yield n + 2
    print('After yielding', n + 2)
    
F = f(10)
F

next(F)
next(F)
next(F)
next(F)
next(F)

A generator function can also contain `return` statements:

In [None]:
def f(n):
    print('Before yielding', n)
    yield n
    print('Before yielding', n + 1)
    yield n + 1
    print('Before returning')
    return
    # Won't be printed out
    print('After returning')
    # Won't be yielded
    yield n + 2
    
F = f(10)
F

next(F)
next(F)
next(F)
next(F)
next(F)

A generator function can yield values yielded by another generator function or any other iterable:

In [None]:
def f():
    yield 1
    yield 2
    
def g():
    yield 0
    for x in f():
        yield x
    for x in range(3, 6):
        yield x
    for x in (x for x in [6, 7, 8]):
        yield x
    for x in [9, 10]:
        yield x

G = g()
for x in G:
    print(x, end=' ')

In such a context, `yield from` statements offer a more concise alternative:

In [None]:
def f():
    yield 1
    yield 2
    
def g():
    yield 0
    yield from f()
    yield from range(3, 6)
    yield from (x for x in [6, 7, 8])
    yield from [9, 10]

list(g())

Let us get back to the problem of discovering successive occurrences of $n$-grams. As part of our heuristics, let us set the sizes of $n$-grams to look for to the default values of 3, 4 and 5:

In [None]:
n_gram_range = range(3, 6)

To look for substrings within a string, we can use either `find()` or `index()` `str`'s  methods. They are exactly the same, except that `find()` returns `-1` whereas `index()` raises a `ValueError` exception when they fail to find their argument as a substring. Both methods take an optional second argument and a second optional third argument to limit the search within a narrower range than the full string to which the method is applied:

In [None]:
'123123'.find('2')
'123123'.find('2', 2)
'123123'.find('2', 2, 4)

When we find a pair of occurrences of a 5-gram, we are guaranteed to find as well 2 pairs of occurrences of 4-grams, and 3 pairs of occurrences of 3-grams, but there does not seem to be a good reason to prevent the redundancy (in particular because between two consecutive occurrences of an $n$-gram that extends a substring $s$, there can be other occurrences of $s$ in between). The following code fragment takes as example the first 91 characters of `message` (so the first 7 lines of `message` displayed over 13 columns as above) and collects in a list the relevant factors of the distances between all successive occurrences of all 3, 4 and 5-grams in this text. We trace execution and display, in particular, the contexts in which two successive occurrences of a 3, 4 or 5-gram have been detected:

In [None]:
text = message[: 91]

relevant_factors = []
for n in (n_gram_range):
    print(f'Looking for consecutive occurrences of {n}-grams:')
    for i in range(len(text) - 2 * n + 1):
        n_gram = text[i : i + n]
        j = text.find(n_gram, i + n)
        if j != -1:
            print(f'  Found "{n_gram}" at indexes {i} and {j}\n'
                  f'  {text[i : j + n]}\n'
                  f'  Distance: {j - i}. Adding as factors:'
                  f' {tuple(factors_from_successive_n_grams(j - i, n))}\n'
                 )
            relevant_factors.extend(factors_from_successive_n_grams(j - i, n))
            
print(relevant_factors)

It turns out that in these first seven lines, we do not get a single factor of 13: no 3-, 4- or 5-gram has two aligned successive occurrences. The search for successive occurrences of $n$-grams is anyway meant to be done in the encrypted message, and results are different; we used the original message for a friendlier illustration of the technique.

The following generator function adapts the previous code to, for an arbitrary text passed as argument, not collect in a list the relevant factors of the distances between all successive occurrences of all $n$-grams in the text (for the chosen values of $n$), but have a mechanism to return them on demand:

In [None]:
def all_collected_factors(text):
    for n in n_gram_range:
        for i in range(len(text) - 2 * n + 1):
            j = text.find(text[i : i + n], i + n)
            if j != -1:
                yield from factors_from_successive_n_grams(j - i, n)

print(list(all_collected_factors(encrypted_message)))

Arguably, the more frequent a factor is, the more likely it is to be equal to the length of the key that has been used to encrypt the message. We therefore want to rank, from most frequent to least frequent, all factors that have been collected for all consecutive pairs of $n$-grams. The first step is to count each factor. Thanks to the `Counter` class from the `collections` module, it is straightforward to get from a collection a (type of object that embeds a) dictionary whose keys and values are the members of the collection and the number of times they occur in the collection, respectively:

In [None]:
Counter([4, 5, 10, 13, 4, 5, 10, 13, 4, 5, 10, 13, 4, 8, 13])

Factors should be ranked from the most frequent ones to the least frequent ones, but what to do when two factors have equal counts? E.g., what if both 8 and 16 occur 6,811 times? (This is the case when encrypting the text in `carroll.txt` with the key `'0123456789ABCDEF'`.) It seems probable that if the key length was 8 rather than 16, then we would have more $n$-grams that reveal vertical alignments over 8 columns than $n$-grams that reveal vertical alignments over 16 columns (actually, not having a single extra factor of 8 strongly suggests that the key length is 16 indeed). So we decide to rank the divisors from most frequent ones to least frequent ones, and for a given count, from largest ones to smallest ones. So the key of the sorting method needs to take into account first the values of the dictionary embedded in the counter, and then the keys. We can retrieve from a dictionary (be it embedded in a counter) keys only, values only, or both keys and values, in the form of list-like objects:

In [None]:
D = {4: 4, 5: 3, 10: 3, 13: 4, 8: 1}
D.keys()
D.values()
D.items()

The third way of sorting the items of a dictionary offers the solution to ranking properly our collected factors:

In [None]:
D = {4: 4, 5: 3, 10: 3, 13: 4, 8: 1}

sorted(D.items(), reverse=True)
[x[0] for x in sorted(D.items(), reverse=True)]

sorted(D.items(), key=lambda x: x[1], reverse=True)
[x[0] for x in sorted(D.items(), key=lambda x: x[1], reverse=True)]

sorted(D.items(), key=lambda x: (x[1], x[0]), reverse=True)
[x[0] for x in sorted(D.items(), key=lambda x: (x[1], x[0]), reverse=True)]

Rather than using a lambda expression, we can also use `itemgetter()` from the `operator` module:

In [None]:
L = ['A', 'B', 'C']
itemgetter(0)(L)
itemgetter(0, 2)(L)
itemgetter(2, 0, 1)(L)

D = {4: 4, 5: 3, 10: 3, 13: 4, 8: 1}
sorted(D.items(), key=itemgetter(1, 0), reverse=True)
[x[0] for x in sorted(D.items(), key=itemgetter(1, 0), reverse=True)]

Having with `all_collected_factors()` a mechanism to generate some key lengths from most promising to least promising, we still would like to consider all possible key lengths between 1 and the value of `max_key_length`, starting with 1 (in case the message has actually been encrypted with the simple Caesar code), then those given by `all_collected_factors()`, and finally all others, from shortest to longest. This is the purpose of the following generator function:

In [None]:
def key_lengths_from_most_to_least_promising(text):
    yield 1
    key_lengths =\
            [x[0] for x in sorted(Counter(all_collected_factors(text)).items(),
                                  key=itemgetter(1, 0), reverse=True
                                 )
            ]
    yield from key_lengths
    yield from\
            (i for i in range(2, max_key_length + 1) if i not in key_lengths)

list(key_lengths_from_most_to_least_promising(encrypted_message))

For all $n<13$, the $n$-th character of `key` encrypts the $n$-th column of `message`, yielding the $n$-th column of `encrypted_message`, which can be decrypted back to the $n$-th column of `message`:  

In [None]:
for n in range(13):
    decrypt(key[n], encrypted_message[n :: 13])

Compare with the result obtained when decrypting the $n$-th column of `encrypted_message` not with the right character (the $n$-th character in `key`), but with an arbitrary character:

In [None]:
for n in range(13):
    decrypt('m+2I|b~!AxZ=I', encrypted_message[n :: 13])

The columns derived from the correct key contain more letters. But is that all? Is the distribution of letters in a given column good, in some sense? A column consisting of nothing but q's, z's and j's would not appear as natural. 

"etaoinshrdlcumwfgypbvkjxqz" is all lowercase letters ordered in decreasing frequency of use in English: "e" is most common, then comes "t", then "a"...

A good distribution of letters in a given column should be relatively consistent with the ordering of letters given by the previous sequence. The notion of etaoin score will turn this idea into a precise measure.

Consider the first column, "Ai fh do epbeg ooneoAurn". Let us convert all letters to lowercase and get a count of the number of occurrences of the letters that occur in the resulting string:

In [None]:
column = Counter(c.lower() for c in 'Ai fh do epbeg ooneoAurn' if c.isalpha())
print(column)

Let us order the letters by decreasing frequency in `column`. When two letters $\alpha$ and $\beta$ share the same count, let us be "pessimistic" about `column`'s quality and rank $\alpha$ before $\beta$ if $\alpha$ occurs after $\beta$ in "etaoinshrdlcumwfgypbvkjxqz". So "o" comes first with 4 occurrences, then comes "e" with 3 occurrences. Both "a" and "n" have 2 occurrences, but "a" is more frequent than "n" in English (they occur at index 2 and 5 in `'etaoinshrdlcumwfgypbvkjxqz'`, respectively), so we rank "n" before "a". All other letters occur only once in `column`, with "b" least frequent in English, so ranked next, and "i" most frequent in English, so ranked last:

In [None]:
ranked_letters_in_column = ''.join(
       sorted(column,
              key=lambda x: (column[x], 'etaoinshrdlcumwfgypbvkjxqz'.index(x)),
              reverse=True
             )
                                 )

ranked_letters_in_column

The previous code can be improved by defining from `'etaoinshrdlcumwfgypbvkjxqz'` a dictionary thanks to which the index in that string of a lowercase letter can be more effectively retrieved, similarly to the way the dictionary `shifts` has been defined from the string `characters`:

In [None]:
etaoin = {'etaoinshrdlcumwfgypbvkjxqz'[i]: i for i in range(26)}

ranked_letters_in_column = ''.join(
             sorted(column, key=lambda x: (column[x], etaoin[x]), reverse=True)
                                  )

ranked_letters_in_column

The etaoin score of `ranked_letters_in_column` is actually a function of both `ranked_letters_in_column` and a nonzero natural number $l$ at most equal to the length of this string: for such a number $l$, it is equal to the number of elements common to the initial segments of `'oenabpgfudrhi'` and `'etaoinshrdlcumwfgypbvkjxqz'` of length $l$. Let us display for $l$ at most equal to 10, $l$ itself, then those initial segments of length $l$, then the letters both segments have in common, then the number of those letters, so the corresponding etaoin score:

In [None]:
for i in range(1, 11):
    s_1 = ranked_letters_in_column[: i]
    s_2 = 'etaoinshrdlcumwfgypbvkjxqz'[: i]
    s = ''.join(set(s_1) & set(s_2))
    print(f'{i:2} {s_1:10} {s_2:10} {s:9} {len(s)}')

Converting strings to sets and taking their intersections is not the most efficient approach here. We can instead look whether each of the first $l$ letters in `ranked_letters_in_column` is one of the first $l$ characters in `'etaoinshrdlcumwfgypbvkjxqz'`. To get the etaoin score of `ranked_letters_in_column` for $l=10$, we need to see how many times the `if` statement below evaluates to true:

In [None]:
for i in range(10):
    s = ranked_letters_in_column[i]
    print(s, end=' ')
    print('occurs' if etaoin[s] < 10 else 'does not occur in',
          'etaoinshrdlcumwfgypbvkjxqz'[: 10]
         )

This is easily done with the `sum()` function:

In [None]:
sum(1 for i in range(10) if etaoin[ranked_letters_in_column[i]] < 10)

Putting it all together in a function:

In [None]:
def etaoin_score(text, length):
    letter_counts = Counter(c.lower() for c in text if c.isalpha())
    ranked_letters = sorted(letter_counts,
                            key=lambda x: (letter_counts[x], etaoin[x]),
                            reverse=True
                           )
    return sum(1 for i in range(min(length, len(ranked_letters)))
                     if etaoin[ranked_letters[i]] < length
              )

etaoin_score('Ai fh do epbeg ooneoAurn', 10)

As part of the heuristic, we have to chose a value for the second argument `length` of `etaoin_score()`, which clearly should be neither too small not too large:

In [None]:
etaoin_length = 6

The 8th column of `message` displayed over 13 columns has be encrypted with "E", the 8th character of "Take it Easy!". Deciphering this column with "E" and "e" shows similar results, with at many positions, the same letter, but lowercase for one, and uppercase for the other. The etaoin score is the same for both:

In [None]:
decrypt('E', encrypted_message[8 :: 13])
decrypt('e', encrypted_message[8 :: 13])

etaoin_score(decrypt('E', encrypted_message[8 :: 13]), etaoin_length)
etaoin_score(decrypt('e', encrypted_message[8 :: 13]), etaoin_length)

If we assume that the original message is "usual" text, with mostly lowercase letters, then we should consider "E" to be more promising than "e". More generally, when deciphering a given column of the encrypted message with two letters, if the ataoin scores differ, then the letter with the highest score is arguably more likely to be correct; but if both ataoin scores are the same, then the letter that yields the highest proportion of lowercase letters over all letters is arguably more likely to be correct.

The following code fragment tentatively deciphers the 8th column of `encryped_message` with each of the 97 members of `characters` (1-character subkeys). It computes the 97 etaoin scores as well as the 97 proportions of lowercase letters over all letters plus 1 (to prevent division by 0). It then ranks each of the 97 1-character subkeys in decreasing value of etaoin score, and for a given etaoin score, in decreasing proportion of lowercase letters. Eventually, it displays the top 10 results, together with the corresponding tentative deciphering of the 8th column:

In [None]:
scores = []
for subkey in characters:
    decrypted_column = decrypt(subkey, encrypted_message[8 :: 13])
    nb_of_lowercase_letters = 0
    nb_of_letters = 1
    for c in (c for c in decrypted_column if c.isalpha()):
        nb_of_letters += 1
        if c.islower():
            nb_of_lowercase_letters += 1
    scores.append((subkey, etaoin_score(decrypted_column, etaoin_length),
                   nb_of_lowercase_letters / nb_of_letters
                  )
                 )
scores.sort(key=itemgetter(1, 2), reverse=True)

for scored_subkey in scores[: 10]:
    print(scored_subkey, decrypt(scored_subkey[0], encrypted_message[8 :: 13]))

Given a tentative key length $l$, hoped to be the actual length of the hidden key, there are $l$ 1-character subkeys to discover, one for each column of the message displayed over $l$ many columns. One would like to consider, for each column, nothing but its most promising 1-character subkeys. Restricting ourselves to, for each column, its $k$ most promising subkeys, a total of $k^l$ full keys can then be assembled from the $l$ 1-character subkeys. So $k$ has to be small. Let us still make it another parameter of the overall heuristics and set it to a default of 2:

In [None]:
nb_of_options_for_subkey = 2

To easily generate all possible keys from all choices of 1-character subkeys, the `product` class from the `itertools` module is useful:

In [None]:
# The cartesian product {0, 1} x {'A', 'B'} x {'c', 'd'}
list(product(range(2), ['A', 'B'], (x for x in 'cd')))
# The cartesian product ({0, 1} x {'A', 'B'})^2
list(product(range(2), ['A', 'B'], repeat=2))

The following code fragment passes as an argument to `list()` a generator expression to generate all keys of length 3 from a triple of 2 possible options for each of three 1-character subkeys, with `'0'` or `'1'` for the first subkey, `'A'` or `'B'` for the second subkey, `'c'` or `'d'` for the third subkey:  

In [None]:
subkeys = ('0', '1'), ('A', 'B'), ('c', 'd')

list(key for key in (''.join(subkey) for subkey in product(*subkeys)))

Having assembled a complete key from 1-character subkeys, the encrypted message can be tentatively decrypted as a whole. Whereas 1-character subkeys look more or less promising depending on the distribution of letters in a tentatively decrypted column, whole keys look more or less promising depending on whether the tentatively decrypted message contains enough letters amongst all characters, and enough English words amongst all words, here defining a word as a longest sequence of consecutive letters. So any longest sequence of consecutive nonletters is a word separator; that is something not for the `split()` method of the `str` class, but for the `split()` function of the `re` module, using the syntax of regular expressions:

In [None]:
# Using any of the characters between the square brackets as a separator
split('[+$(^?=_)]', '0+$abc(^?DEF=_')
# Using any longest nonempty sequence of the characters between the
# square brackets as a separator
split('[+$(^?=_)]+', '0+$abc(^?DEF=_')
# Using any lowercase letter as a separator
split('[a-z]', '0+$abc(^?DEF=_')
# Using anything but a lowercase letter as a separator
split('[^a-z]', '0+$abc(^?DEF=_')
# Using any longest nonempty sequence of letters as a separator
split('[a-zA-Z]+', '0+$abc(^?DEF=_')
# Using any longest nonempty sequence of anything but letters or digits
# as a separator
split('[^a-zA-Z0-9]+', '0+$abc(^?DEF=_')

So `'[^a-zA-Z]+'` is the regular expression we need:

In [None]:
print(split('[^a-zA-Z]+', message))

To complete the heuristics, we make use of two last parameters. First, a desired fraction of letters over all letters in the tentatively deciphered message, set to a default of 70%. Second, a desired fraction of English words over all words in the tentatively deciphered message, set to a default of 50%:

In [None]:
fraction_of_letters = 0.7
fraction_of_words = .5

With `dictionary` denoting a list of all lowercase English words (first read from a file in practice), the following function returns `True` or `False` depending on whether the tentatively decrypted message passed as first argument looks right or not. In case the function returns `True`, the tentatively decrypted message will be displayed to the user to accept, or to reject, in which case the program will resume its search for the correct key:

In [None]:
def looks_like_English(text, dictionary):
    if sum(1 for c in text if c.isalpha()) / len(text) < fraction_of_letters:
        return False
    possible_words = split('[^a-zA-Z]+', text)
    nb_of_words = sum(1 for w in possible_words if w in dictionary)
    return nb_of_words / len(possible_words) > fraction_of_words

We are ready to put everything together.

* A function `encrypt_file()` is designed to encrypt the text contained in a file, whose name is provided as second argument, the first argument being the encryption key. By default, the function displays the encrypted message to the screen, but the third argument, set by default to `None`, can be changed to the name of a file and the encrypted message will then be written to this file instead.
    * To read from a file, the second argument should be the name of an existing file. Otherwise, `open()` will raise a `FileNotFoundError` exception, which the code catches in an `except` statement.
    * To write to a new file or overwrite the contents of an existing file, the `open()` function is given `'w'` as second argument.
    * `encrypt_file()` just calls the `encrypt()` function to perform the encryption. That function expects the message to encrypt to be given as a single string, with as many embedded `'\n'` characters in the string as there are lines in the message. To read the whole contents of a file as a single string, we use the `read()` method of the object returned by the `open()` function.
* A function `decrypt_file()` is designed to decrypt the text contained in a file, whose name is provided as second argument, the first argument being the encryption key. By default, the function displays the decrypted message to the screen, but the third argument, set by default to `None`, can be changed to the name of a file and the decrypted message will then be written to this file instead, provided that it does not exist.
    * To write to a new file, the `open()` function is given `'x'` as second argument; in case the file exists, then `open()` will not overwrite it but instead, raise a `FileExistsError` exception.
    * `decrypt_file()` just calls the `decrypt()` function to perform the decryption, also using `read()` to get the message to decrypt as a single string.
* A function `break_key_for_file()` is designed to try and break the key of an encrypted message stored in a file whose name is passed as argument to `break_key_for_file()`. Here again, `read()` is used to get the text to decrypt (from a key that first, has to be discovered) as a single string, that is passed an an argument to another function, `break_key()`.
    * `break_key()` opens a file meant to contain a list of English words, one per line. The `strip()` method is used to discard the new line character that ends each line of this file, the `lower()` method to convert each word to all lowercase.
    * `break_key()` quickly displays all keys it comes up with to tentatively decrypt the encrypted message; carriage return is used to quickly display all keys of a given length on a single line, each such key overwriting the previous one.
    * When `looks_like_English()` returns `True`, `break_key()` displays the first 200 characters of the tentatively deciphered message, and prompts the user to express whether the message has been successfully decrypted, or whether search for another key should be resumed. In case it exhausts all keys it comes up with, none of which is either proposed to or validated by the user, `break_key()` admits defeat.

In [None]:
dictionary_file = 'dictionary.txt'

def encrypt_file(key, filename, encrypted_filename=None):
    try:
        with open(filename) as file:
            if encrypted_filename:
                with open(encrypted_filename, 'w') as encrypted_file:
                    print(encrypt(key, file.read()), end='',
                          file=encrypted_file
                         )
            else:
                return encrypt(key, file.read())
    except FileNotFoundError:
        print(f'Could not open {filename}, giving up.')

def decrypt_file(key, filename, decrypted_filename=None):
    try:
        with open(filename) as file:
            if decrypted_filename:
                try:
                    with open(decrypted_filename, 'x') as decrypted_file:
                        print(decrypt(key, file.read()), file=decrypted_file)
                except FileExistsError:
                    print(f'{decrypted_filename} exists, giving up.')
            else:
                return decrypt(key, file.read())
    except FileNotFoundError:
        print(f'Could not open {filename}, giving up.')

def break_key_for_file(filename):
    try:
        with open(filename) as file:
            break_key(file.read())
    except FileNotFoundError:
        print(f'Could not open {filename}, giving up.')

def break_key(text):
    try:
        with open(dictionary_file) as file:
            dictionary = {w.strip().lower() for w in file}
    except FileNotFoundError:
        print(f'Could not open the file {dictionary_file}, giving up.')
        return
    for key_length in key_lengths_from_most_to_least_promising(text):
        print(f'\nNow working with keys of length {key_length}')
        subkeys = []
        for n in range(key_length):
            scores = []
            for subkey in characters:
                decrypted_column = decrypt(subkey, text[n :: key_length])
                nb_of_lowercase_letters = 0
                nb_of_letters = 1
                letters = (c for c in decrypted_column if c.isalpha())
                for c in letters:
                    nb_of_letters += 1
                    if c.islower():
                        nb_of_lowercase_letters += 1
                scores.append((subkey,
                               etaoin_score(decrypted_column, etaoin_length),
                               nb_of_lowercase_letters / nb_of_letters
                              )
                             )
            scores.sort(key=itemgetter(1, 2), reverse=True)
            subkeys.append(x[0] for x in scores[: nb_of_options_for_subkey])
        for key in (''.join(subkey) for subkey in product(*subkeys)):
            print('\r', key, end='')
            decrypted_text = decrypt(key, text)
            if looks_like_English(decrypted_text, dictionary):
                print('\nWhat about this?\n')
                print(decrypted_text[: 200], '...', sep='')
                print()
                print('Enter Y[es] if happy, otherwise press any key '
                      "and I'll keep working."
                     )
                yes_or_no = input('> ')
                if yes_or_no in {'YES', 'Yes', 'yes', 'Y', 'y'}:
                    print(f'The key is: "{key}"')
                    return
    print('Sorry, I did my best...')

In [None]:
encrypt_file('Take it Easy!', 'carroll.txt', 'encrypted_carroll.txt')
break_key_for_file('encrypted_carroll.txt')