In [34]:
# Code based on work by Bart de Zwart from the modern cryptograhpy course from previous year.

def keyLength(ciph_int):
    """
    Find the most probable keylength using letter frequencies.
    Returns the most probable key length.
    """
    keylength_prob = []
    # Loop over possible key lengths. 
    for n in range(1, 14):
        # Get every nth char from list.
        nth_char = ciph_int[::n]
        # Construct ascii frequency list of nth characters.
        freq_list = [0] * 256
        for m in nth_char:
            freq_list[m] += 1 / len(nth_char)
        # Find the sum of qi^2 for each i, the highest value is most 
        # likely the key length.
        freq_sum = 0
        for k in freq_list:
            freq_sum += pow(k, 2)
        keylength_prob.append(freq_sum)
    return keylength_prob.index(max(keylength_prob)) + 1


def matchCandidates(nth_bytes, freq_lc_english):
    """
    Make a list of each ASCII value and their probability of being the correct key value.
    Returns a 256-long list of probability tuples.
    """
    
    key_value_prob = []
    # try all possible plaintext candidates.
    for m in range(0, 256):
        freq_list = [0] * 256
        # Decrypt the candidate
        candidate = [m ^ i for i in nth_bytes.copy()]
        
        # Candidate with invalid characters are discarded.
        if(any((i < 32 or i > 127 or 
                (i >= 48 and i <= 57)) for i in candidate)):
            continue

        # Find the character frequencies for the candidate.
        for i in candidate:
            freq_list[i] += 1
        if(sum(freq_list[97:123]) == 0):
            continue
        
        # Make sure capital letters are not used.
        lowercase_freq = sum(freq_list[97:123])
        freq_list_lowercase = [i / lowercase_freq
                                for i in freq_list[97:123]]

        # Find the sum of qi * pi, should be around 0.065.
        value_prob = 0
        for i in range(0, 26):
            value_prob += freq_list_lowercase[i] * freq_lc_english[i]

        key_value_prob.append((value_prob, m))
    return key_value_prob


def findKey(key_len, ciph_int):
    """
    Finds the candidate message whose letter frequency best match that of the english alphabet.
    Returns the corresponding key.
    """
    # Letter frequency from:
    # https://www.coursera.org/learn/cryptography/lecture/01t8O/breaking-the-vigenere-cipher
    freq_lc_english = [0.082, 0.015, 0.028, 0.043, 0.127, 0.022, 0.020, 0.061,
                       0.070, 0.002, 0.008, 0.040, 0.024, 0.067, 0.075, 0.019,
                       0.001, 0.060, 0.063, 0.091, 0.028, 0.010, 0.024, 0.002,
                       0.020, 0.001]
    key = []
    # For each position of the key
    for n in range(0, key_len):
        # Get the nth bytes (bytes corresponding to a single key value)
        nth_bytes = ciph_int[n::key_len]
        bestcandidate = matchCandidates(nth_bytes, freq_lc_english)
        # Based on: https://stackoverflow.com/questions/12141150/
        key.append(min(bestcandidate, key=lambda x:abs(x[0] - 0.065))[1])
    return key


def decrypt(key, ciph_int):
    """
    Receives the key and ciphertext
    Decrypts the ciphertext by XOR'ing the key, print the message.
    """
    ciph_solved = []
    
    for i in range(0, len(ciph_int)):
        ciph_solved.append(chr(key[i%len(key)] ^ ciph_int[i]))
    print(''.join(ciph_solved))


def main():
    """
    Using frequency analysis of a ciphertext, find the most likely corresponding plaintext.
    """
    
    # Adjust the name of this variable to decrypt a different ciphertext
    ciphertext = 'ciphertext2.txt'
    
    with open(ciphertext, 'r') as ciphtext:
        ciph = ciphtext.read().rstrip('\n');
        # Convert chiptertext to list of integers
        ciph_int = [int(ciph[i:i+2], 16) for i in range(0, len(ciph), 2)]
        
        # Find most probable key length.
        key_len = keyLength(ciph_int)        
        key = findKey(key_len, ciph_int)
        
        # The key for the second ciphertext is guessed incorrecly for the 5th index. 
        # After observing the partially incorrect plaintext, we have made a (highly accurate) guess for this key value.
        if(ciphertext == 'ciphertext2.txt'):
            key[4] = 43

        # Decrypt the ciphertext and print.
        decrypt(key, ciph_int)


if __name__ == "__main__":
    main()

The NSA has built an infrastructure that allows it to intercept almost everything. With this capability, the vast majority of human communications are automatically ingested without targeting. If I wanted to see your emails or your wife's phone, all I have to do is use intercepts. I can get your emails, passwords, phone records, credit cards. I don't want to live in a society that does these sort of things... I do not want to live in a world where everything I do and say is recorded. That is not something I am willing to support or live under.
