In [1]:
from itertools import *
from copy import copy, deepcopy

Let $\mathcal{A}$ be our alphabet:

In [2]:
def make_alphabet_entry(i):
    alpha = i + ord('a')
    return chr(alpha),i

alphabet = dict(map(make_alphabet_entry, range(26)))
alphabet.update({' ':26})
n = len(alphabet)
alphabet, n

({' ': 26,
  'a': 0,
  'b': 1,
  'c': 2,
  'd': 3,
  'e': 4,
  'f': 5,
  'g': 6,
  'h': 7,
  'i': 8,
  'j': 9,
  'k': 10,
  'l': 11,
  'm': 12,
  'n': 13,
  'o': 14,
  'p': 15,
  'q': 16,
  'r': 17,
  's': 18,
  't': 19,
  'u': 20,
  'v': 21,
  'w': 22,
  'x': 23,
  'y': 24,
  'z': 25},
 27)

We define a function `encode` that consumes a string and produces a list of integer in our field:

In [3]:
def encode(s): return list(map(lambda i: alphabet[i], s))

inverse_alphabet = {v:k for k,v in alphabet.items()}
def decode(e): return "".join(map(lambda i: inverse_alphabet[i], e))

In [4]:
plain_text = '''Edward Joseph Snowden is an American computer professional, 
former Central Intelligence Agency employee, and former contractor for the federal government who 
copied and leaked classified information from the National Security Agency without 
prior authorization. His disclosures revealed numerous global surveillance programs, 
many run by the NSA and Five Eyes with the cooperation of telecommunication companies and 
European governments.'''.lower().replace(',','').replace('.','').replace('\n','')
m = len(plain_text)
plain_text, m

('edward joseph snowden is an american computer professional former central intelligence agency employee and former contractor for the federal government who copied and leaked classified information from the national security agency without prior authorization his disclosures revealed numerous global surveillance programs many run by the nsa and five eyes with the cooperation of telecommunication companies and european governments',
 432)

In [5]:
assert decode(encode(plain_text)) == plain_text

In [6]:
encoded_plain_text = encode(plain_text)
encoded_plain_text

[4,
 3,
 22,
 0,
 17,
 3,
 26,
 9,
 14,
 18,
 4,
 15,
 7,
 26,
 18,
 13,
 14,
 22,
 3,
 4,
 13,
 26,
 8,
 18,
 26,
 0,
 13,
 26,
 0,
 12,
 4,
 17,
 8,
 2,
 0,
 13,
 26,
 2,
 14,
 12,
 15,
 20,
 19,
 4,
 17,
 26,
 15,
 17,
 14,
 5,
 4,
 18,
 18,
 8,
 14,
 13,
 0,
 11,
 26,
 5,
 14,
 17,
 12,
 4,
 17,
 26,
 2,
 4,
 13,
 19,
 17,
 0,
 11,
 26,
 8,
 13,
 19,
 4,
 11,
 11,
 8,
 6,
 4,
 13,
 2,
 4,
 26,
 0,
 6,
 4,
 13,
 2,
 24,
 26,
 4,
 12,
 15,
 11,
 14,
 24,
 4,
 4,
 26,
 0,
 13,
 3,
 26,
 5,
 14,
 17,
 12,
 4,
 17,
 26,
 2,
 14,
 13,
 19,
 17,
 0,
 2,
 19,
 14,
 17,
 26,
 5,
 14,
 17,
 26,
 19,
 7,
 4,
 26,
 5,
 4,
 3,
 4,
 17,
 0,
 11,
 26,
 6,
 14,
 21,
 4,
 17,
 13,
 12,
 4,
 13,
 19,
 26,
 22,
 7,
 14,
 26,
 2,
 14,
 15,
 8,
 4,
 3,
 26,
 0,
 13,
 3,
 26,
 11,
 4,
 0,
 10,
 4,
 3,
 26,
 2,
 11,
 0,
 18,
 18,
 8,
 5,
 8,
 4,
 3,
 26,
 8,
 13,
 5,
 14,
 17,
 12,
 0,
 19,
 8,
 14,
 13,
 26,
 5,
 17,
 14,
 12,
 26,
 19,
 7,
 4,
 26,
 13,
 0,
 19,
 8,
 14,
 13,
 0,
 11,
 26,
 18,
 4,
 2,

Let $k \in \left(\frac{\mathbb{Z}}{n\mathbb{Z}}\right)^{m}$ be a *key* of length $m\in\mathbb{N}$, for example:

In [7]:
# it should be better to randomly generate it to keep it safe
k = encode("ericsmullyan")
k, len(k)

([4, 17, 8, 2, 18, 12, 20, 11, 11, 24, 0, 13], 12)

Now we are in the position to define the `encrypt` function:

In [8]:
def encrypt(message, key):
    return [(p+v)%n for p,v in zip(message, cycle(key))]

In [9]:
cipher_text = encrypt(encoded_plain_text, k)
decode(cipher_text)

'iudciptuzpeblq pfhxpyxiedrvbsyybt a dtwogfmpbxpdswmujuhyli sshugilwpyqrnpqqpkqewtde gvhcyqgnixeztbw wqtlya sshugilwzyqrngjwtrrhbkqhrdwmfwcuwkdohihvowzmkgeomgexkwptlya yirsgvlwwlpsvjzmfrugqzomnxzwprrkzxxtuiqvckuhyli eitbt erklde gohy eazeq bvzwtrmndslrvcrakfztstp qmiknfdnbpp dilmccqxkyrmrvebursezmylmwkzxwuewlkcrdfzqycuxcxmnrohtlztmixtuiqvusluyoxfvzvhgpqlkgftudjpgrohz brnxzwpr zkdblrgeuolzbnlqiarqkqdauytbsmedlbwfkz ba dxwxwcgxpkte'

In [12]:
def frequency(elem, lst):
    return lst.count(elem)

def coincidence_index(lst, A=alphabet):
    sums = 0
    freqs = {}
    denom = len(lst)
    #if denom == 0 or denom == 1: return 0,{}
    for k,v in A.items(): freqs[k] = frequency(v, lst)
    ci = sum(v*(v-1) for v in freqs.values())/(denom*(denom-1))
    return ci, freqs

def mutual_coincidence_index(fst, snd, A=alphabet):
    fst_ci = coincidence_index(fst, A)[1]
    snd_ci = coincidence_index(snd, A)[1]
    return sum(fst_ci[k] * snd_ci[k] for k in A.keys())/(len(fst)*len(snd))
    #return sum((fst_ci[k]/m)*(snd_ci[k]/m) for k,v in A.items())

In [13]:
coincidence_index(encoded_plain_text)

(0.0663186388244393,
 {' ': 57,
  'a': 31,
  'b': 2,
  'c': 17,
  'd': 13,
  'e': 48,
  'f': 10,
  'g': 7,
  'h': 10,
  'i': 25,
  'j': 1,
  'k': 1,
  'l': 16,
  'm': 15,
  'n': 34,
  'o': 35,
  'p': 10,
  'q': 0,
  'r': 30,
  's': 19,
  't': 22,
  'u': 11,
  'v': 5,
  'w': 5,
  'x': 0,
  'y': 7,
  'z': 1})

In [14]:
coincidence_index(cipher_text)

(0.0381434218441179,
 {' ': 19,
  'a': 8,
  'b': 16,
  'c': 11,
  'd': 17,
  'e': 18,
  'f': 10,
  'g': 16,
  'h': 14,
  'i': 17,
  'j': 4,
  'k': 18,
  'l': 21,
  'm': 16,
  'n': 10,
  'o': 10,
  'p': 19,
  'q': 20,
  'r': 24,
  's': 13,
  't': 20,
  'u': 19,
  'v': 12,
  'w': 24,
  'x': 18,
  'y': 17,
  'z': 21})

In [15]:
def spread(message, block_length):
    return [message[i:i+block_length] for i in range(0, len(message), block_length)]
    
def col(spreaded, c, joiner=""):
    column = [lst[c] if c < len(lst) else None for lst in spreaded]
    ready = list(filter(lambda i: i is not None, column))
    return joiner.join(ready) if joiner is not None else ready

In [16]:
spreaded = spread(cipher_text, 5)
spreaded

[[8, 20, 3, 2, 8],
 [15, 19, 20, 25, 15],
 [4, 1, 11, 16, 26],
 [15, 5, 7, 23, 15],
 [24, 23, 8, 4, 3],
 [17, 21, 1, 18, 24],
 [24, 1, 19, 26, 0],
 [26, 3, 19, 22, 14],
 [6, 5, 12, 15, 1],
 [23, 15, 3, 18, 22],
 [12, 20, 9, 20, 7],
 [24, 11, 8, 26, 18],
 [18, 7, 20, 6, 8],
 [11, 22, 15, 24, 16],
 [17, 13, 15, 16, 16],
 [15, 10, 16, 4, 22],
 [19, 3, 4, 26, 6],
 [21, 7, 2, 24, 16],
 [6, 13, 8, 23, 4],
 [25, 19, 1, 22, 26],
 [22, 16, 19, 11, 24],
 [0, 26, 18, 18, 7],
 [20, 6, 8, 11, 22],
 [25, 24, 16, 17, 13],
 [6, 9, 22, 19, 17],
 [17, 7, 1, 10, 16],
 [7, 17, 3, 22, 12],
 [5, 22, 2, 20, 22],
 [10, 3, 14, 7, 8],
 [7, 21, 14, 22, 25],
 [12, 10, 6, 4, 14],
 [12, 6, 4, 23, 10],
 [22, 15, 19, 11, 24],
 [0, 26, 24, 8, 17],
 [18, 6, 21, 11, 22],
 [22, 11, 15, 18, 21],
 [9, 25, 12, 5, 17],
 [20, 6, 16, 25, 14],
 [12, 13, 23, 25, 22],
 [15, 17, 17, 10, 25],
 [23, 23, 19, 20, 8],
 [16, 21, 2, 10, 20],
 [7, 24, 11, 8, 26],
 [4, 8, 19, 1, 19],
 [26, 4, 17, 10, 11],
 [3, 4, 26, 6, 14],
 [7, 24, 26, 4

In [17]:
col(spreaded,2,None)

[3,
 20,
 11,
 7,
 8,
 1,
 19,
 19,
 12,
 3,
 9,
 8,
 20,
 15,
 15,
 16,
 4,
 2,
 8,
 1,
 19,
 18,
 8,
 16,
 22,
 1,
 3,
 2,
 14,
 14,
 6,
 4,
 19,
 24,
 21,
 15,
 12,
 16,
 23,
 17,
 19,
 2,
 11,
 19,
 17,
 26,
 26,
 16,
 22,
 3,
 2,
 25,
 26,
 13,
 15,
 11,
 23,
 17,
 17,
 24,
 25,
 22,
 3,
 2,
 12,
 19,
 8,
 16,
 20,
 21,
 15,
 5,
 15,
 25,
 23,
 26,
 11,
 14,
 11,
 16,
 20,
 12,
 22,
 1,
 22,
 23]

In [18]:
def analyze(cipher_text):
    res = {}
    for d in range(2, m+1):
        spreaded = spread(cipher_text, d)
        first_col = col(spreaded, 0, joiner=None)
        res[d] = []
        for c in range(d):
            column = col(spreaded, c, joiner=None)
            if len(column) < 2: continue
            mci = coincidence_index(column)[0]
            res[d].append(mci)
    return res

def guess_key_length(analysis):
    candidates = {}
    for k,v in analysis.items():
        cs = list(filter(lambda i: i > .06, v))
        if cs and len(cs) > k/2: candidates[k] = cs
    return candidates

In [19]:
analysis = analyze(cipher_text)

In [20]:
guess_key_length(analysis)

{12: [0.08253968253968254,
  0.07142857142857142,
  0.06190476190476191,
  0.08412698412698413,
  0.07301587301587302,
  0.06666666666666667,
  0.07777777777777778,
  0.0746031746031746],
 36: [0.07575757575757576,
  0.06060606060606061,
  0.07575757575757576,
  0.06060606060606061,
  0.06060606060606061,
  0.06060606060606061,
  0.06060606060606061,
  0.10606060606060606,
  0.10606060606060606,
  0.06060606060606061,
  0.06060606060606061,
  0.10606060606060606,
  0.07575757575757576,
  0.12121212121212122,
  0.09090909090909091,
  0.13636363636363635,
  0.09090909090909091,
  0.06060606060606061,
  0.10606060606060606,
  0.09090909090909091,
  0.06060606060606061],
 72: [0.2,
  0.06666666666666667,
  0.06666666666666667,
  0.06666666666666667,
  0.13333333333333333,
  0.06666666666666667,
  0.2,
  0.13333333333333333,
  0.2,
  0.06666666666666667,
  0.06666666666666667,
  0.06666666666666667,
  0.06666666666666667,
  0.06666666666666667,
  0.06666666666666667,
  0.06666666666666667,


In [40]:
def find_key(cipher_text, key_length):
    key = {}
    spreaded = spread(cipher_text, key_length)
    for c in range(key_length):
        column = col(spreaded, c, joiner=None)
        for v in range(0,n):
            shifted = list(map(lambda x: (x-v)%n, column))
            key[c,v]={}
            for a in range(key_length):
                if a == c and v == 0: continue
                another = col(spreaded, a, joiner=None)
                mci = mutual_coincidence_index(shifted, another)
                key[c,v].update({a:mci})
    return key

In [41]:
find_key(cipher_text, key_length=12)

{(0, 0): {1: 0.03009259259259259,
  2: 0.026234567901234566,
  3: 0.031635802469135804,
  4: 0.05092592592592592,
  5: 0.022376543209876542,
  6: 0.027777777777777776,
  7: 0.022376543209876542,
  8: 0.040123456790123455,
  9: 0.041666666666666664,
  10: 0.033179012345679014,
  11: 0.03009259259259259},
 (0, 1): {0: 0.02546296296296296,
  1: 0.046296296296296294,
  2: 0.0470679012345679,
  3: 0.040123456790123455,
  4: 0.05324074074074074,
  5: 0.05169753086419753,
  6: 0.038580246913580245,
  7: 0.021604938271604937,
  8: 0.020061728395061727,
  9: 0.024691358024691357,
  10: 0.029320987654320986,
  11: 0.02854938271604938},
 (0, 2): {0: 0.041666666666666664,
  1: 0.04243827160493827,
  2: 0.027777777777777776,
  3: 0.0625,
  4: 0.016975308641975308,
  5: 0.030864197530864196,
  6: 0.05246913580246913,
  7: 0.02854938271604938,
  8: 0.031635802469135804,
  9: 0.04938271604938271,
  10: 0.029320987654320986,
  11: 0.038580246913580245},
 (0, 3): {0: 0.04243827160493827,
  1: 0.02391975