# Problem 59

https://projecteuler.net/problem=59

In [39]:
from binascii import a2b_base64
from itertools import combinations

def breakRepXORKey(s,keysize):
    # convert base64 text to binary
    #b = a2b_base64(s)
    b = s
    
    # Break the ciphertext into blocks of KEYSIZE length, then transpose the blocks. Make a block that is the
    # first byte of every block, and a block that is the second byte of every block, and so on.
    # This is because each corresponding byte in all blocks has been encrypted with the same key character,
    # thus I can try to guess the key character it using the single-character XOR attach implemented at point 4.
    nblocks = len(b)//keysize # I'm skipping the last part of the cypher, I could maybe pad it to use the last block
    blocks = []
    for k in range(keysize):
        tblock = []
        for i in range(nblocks):
            tblock.append(b[k+keysize*i])
        blocks.append(tblock)

    # Solve each block as if it was single-character XOR, recompose the key!
    key = ""
    for b in blocks:
        strings = [ ''.join( chr(n^k) for n in b ) for k in range(256) ]
        decoded = max(strings, key=sentenceScore)
        key += chr(strings.index(decoded))
    print("Guessed KEY =",key)
    return key

def decodeRepKeyXOR(s,key):
    #b = a2b_base64(s)
    b = s
    i = 0
    enc = []
    for c in b:
        enc.append( c^ord(key[i]) ) # XOR with current key letter
        i = (i+1)%len(key) # wrap round key lenght
    return "".join(chr(i) for i in enc)

def repeatingKeyXOR(cipher,key):
    i = 0
    dec = []
    for c in cipher:
        dec.append(c^key[i]) # XOR with current key letter
        i = (i+1)%len(key) # wrap round key lenght
    return dec

# using letter frequencies to compute sentence score
# https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language

letterFreq = {
  " ": 15,
  "e": 12.702,
  "t": 9.056,
  "a": 8.167,
  "o": 7.507,
  "i": 6.966,
  "n": 6.749,
  "s": 6.327,
  "h": 6.094,
  "r": 5.987,
  "d": 4.253
}

def sentenceScore(s):
    return sum([ letterFreq[c] for c in s if c in letterFreq.keys() ])

In [49]:
# with the knowledge that the plain text must contain common English words, 
# decrypt the message and find the sum of the ASCII values in the original text.

with open("data/p059_cipher.txt") as f:
    cipher = [ int(c) for c in f.read().strip("\n").split(",") ]

keysize = 3
key = breakRepXORKey(cipher,keysize)
plaintext = decodeRepKeyXOR(cipher,key)
print()
print(plaintext)

Guessed KEY = exp

An extract taken from the introduction of one of Euler's most celebrated papers, "De summis serierum reciprocarum" [On the sums of series of reciprocals]: I have recently found, quite unexpectedly, an elegant expression for the entire sum of this series 1 + 1/4 + 1/9 + 1/16 + etc., which depends on the quadrature of the circle, so that if the true sum of this series is obtained, from it at once the quadrature of the circle follows. Namely, I have found that the sum of this series is a sixth part of the square of the perimeter of the circle whose diameter is 1; or by putting the sum of this series equal to s, it has the ratio sqrt(6) multiplied by s to 1 of the perimeter to the diameter. I will soon show that the sum of this series to be approximately 1.644934066842264364; and from multiplying this number by six, and then taking the square root, the number 3.141592653589793238 is indeed produced, which expresses the perimeter of a circle whose diameter is 1. Following

In [50]:
# find the sum of the ASCII values in the original text.

keyval = [ ord(i) for i in key]
print(keyval)
dec = repeatingKeyXOR(cipher,keyval)
print(sum(dec))

[101, 120, 112]
129448


### Now trying to guess the key lenght...

The analysis using the Hamming distance works, but choose a key lenght that is a multiple of the original one. Given the cipher properties, the breaking algorithm works anyway!

In [51]:
def HammingDistance(b1,b2):
    # count differrent bits
    countdiffbits = 0
    for B1,B2 in zip(b1, b2):
        bindiff = bin(B1^B2) # different bits in binary format
        countdiffbits += bindiff.count("1") # count occurrences of "1"
    return countdiffbits

def guessRepXORKeyLenght(s):
    # convert base64 text to binary
    #b = a2b_base64(s)
    b = s
    
    # guess keysize by testing testing several values and choosing that giving the smallest
    # normalised Hamming distance on bocks of that size
    keys = []
    for ks in range(2,41):
        # compute normalised Hamming distance between all combinations of blocks of size ks
        nbloc = len(b)//ks
        blocks = [ b[j*ks:(j+1)*ks] for j in range(nbloc) ]
        ndave = 0
        ncomb = 0
        for c in combinations(blocks,2):            
            ndave += HammingDistance(c[0],c[1])
            ncomb += 1
        ndave /= ncomb*ks
        keys.append((ks,ndave))

    # choose keysize as that giving smallest average Hamming distance between neigbouring blocks
    keys = sorted(keys,key=lambda x: x[1])
    keysize = keys[0][0]
    print("Guessed KEYSIZE =",keysize)
    return keysize

In [52]:
keysize = guessRepXORKeyLenght(cipher)
key = breakRepXORKey(cipher,keysize)
plaintext = decodeRepKeyXOR(cipher,key)
print()
print(plaintext)

Guessed KEYSIZE = 21
Guessed KEY = expexpexpexpexpexpexp

An extract taken from the introduction of one of Euler's most celebrated papers, "De summis serierum reciprocarum" [On the sums of series of reciprocals]: I have recently found, quite unexpectedly, an elegant expression for the entire sum of this series 1 + 1/4 + 1/9 + 1/16 + etc., which depends on the quadrature of the circle, so that if the true sum of this series is obtained, from it at once the quadrature of the circle follows. Namely, I have found that the sum of this series is a sixth part of the square of the perimeter of the circle whose diameter is 1; or by putting the sum of this series equal to s, it has the ratio sqrt(6) multiplied by s to 1 of the perimeter to the diameter. I will soon show that the sum of this series to be approximately 1.644934066842264364; and from multiplying this number by six, and then taking the square root, the number 3.141592653589793238 is indeed produced, which expresses the perimeter of 