In [1]:
import codecs
from binascii import unhexlify
import inspect
from base64 import b64decode

In [2]:
def bxor(str1: bytes, str2: bytes):
    L = []
    for (el1, el2) in zip(str1, str2):
        L.append(el1^el2)

    return bytes(L)

In [3]:
ascii_text_chars = list(range(97, 122)) + [32]
def letter_ratio(input_bytes):
    nb_letters = sum([ x in ascii_text_chars for x in input_bytes])
    return nb_letters / len(input_bytes)

def is_probably_text(input_bytes):
    r = letter_ratio(input_bytes)
    return True if r>0.7 else False

In [4]:
def decode(enc):
    '''Decodes monoalphabetic (key unknown)'''
    for i in range(2**8): # for every possible key
        # converting the key from a number to a byte
        candidate_key = i.to_bytes(1, byteorder='big')
    #for letter in alph:
        decoded_candidate = bxor(codecs.decode(enc, 'hex'), candidate_key*len(enc))
        if is_probably_text(decoded_candidate):
            return decoded_candidate

In [5]:
def keyphrase_encode(string: bytes, keyphrase: bytes):
    '''Encodes string with repeating keyphrase'''
    key = keyphrase*(len(string)//3) + keyphrase[0:len(string)%3]
    return bxor(string, key).hex()

def b_edit_dist(str1: bytes, str2: bytes):
    return sum(bin(byte).count('1') for byte in bxor(str1,str2))

In [6]:
def score_vigenere_key_size(candidate_key_size, ciphertext):
    slice_size = 2*candidate_key_size

    nb_measurements = len(ciphertext) // slice_size - 1

    score = 0
    for i in range(nb_measurements):

        s = slice_size
        k = candidate_key_size
        slice_1 = slice(i*s, i*s + k)
        slice_2 = slice(i*s + k, i*s + 2*k)

        score += b_edit_dist(ciphertext[slice_1], ciphertext[slice_2])

    score /= candidate_key_size
    score /= nb_measurements
    return score

In [7]:
def find_key_size(ciphertext, min_=2, max_=40):
    '''Finds most probable key size for a ciphertext'''
    key = lambda x: score_vigenere_key_size(x, ciphertext)
    return min(range(min_, max_), key=key)