# Set 1

In [1]:
%pip install numpy scipy plotly pandas nbformat cryptography




In [2]:
import base64
from string import ascii_letters, printable
from collections import Counter
import numpy as np

In [3]:
hamlet = open("hamlet.txt").read()

def letter_freq(text) -> dict:
    return dict(Counter([x.lower() for x in text if x in ascii_letters]))

hamlet_freq = letter_freq(hamlet)
hamlet_freq_arr = np.array(list(hamlet_freq.values()))

## Challenge 1: Convert hex to base64

In [4]:
string = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d"
bs = bytes.fromhex(string)
b64 = base64.b64encode(bs)
b64

b'SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t'

## Challenge 2: Fixed XOR

In [5]:
a = bytes.fromhex("1c0111001f010100061a024b53535009181c")
b = bytes.fromhex("686974207468652062756c6c277320657965")

bytes([a ^ b for a, b in zip(a, b)]).hex()

'746865206b696420646f6e277420706c6179'

## Challenge 3: Single-byte XOR cipher

In [6]:
import scipy.stats

string = "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"
bs = bytes.fromhex(string)

strings = []
for ch in ascii_letters:
    new = bytes([b ^ ord(ch) for b in bs])
    strings.append(new)

def score(bstr):
    if any(chr(x) not in printable or chr(x) in "{}()" for x in bstr):
        return -1

    freq = letter_freq(bstr.decode("ascii").lower())
    freq_arr = np.array(list(freq.values()))
    legit_text_statistic = len(bstr) - sum([1 for x in bstr if chr(x) not in ascii_letters + ".,?!' "])
    ks_test_2 = scipy.stats.ks_2samp(hamlet_freq_arr, freq_arr).pvalue ** 2
    return legit_text_statistic / ks_test_2
    

strings.sort(key=score, reverse=True)
strings[0]

b"Cooking MC's like a pound of bacon"

## Challenge 4: Detect single-character XOR

In [7]:
bs = [bytes.fromhex(line) for line in open("challenge-data/4.txt").readlines()]

c = set()
def break_single_character_xor(bs):
    ss = 100000
    chh = 0
    for ch in range(127):
        new = bytes([b ^ ch for b in bs])
        s = score(new)
        c.add(s)
        if s > ss:
            ss = s
            chh = ch
    return ss, chh

def decrypt_xor(s, k):
    return bytes(b ^ k for b in s)

guesses = []
for line in bs:
    s, ch = break_single_character_xor(line)
    guesses.append((s, ch, line))
guesses.sort(reverse=True)
_, k, s = guesses[0]

decrypt_xor(s, k)


b'Now that the party is jumping\n'

## Challenge 5: Implement repeating-key XOR

In [8]:
string = b"""Burning 'em, if you ain't quick and nimble
I go crazy when I hear a cymbal"""

key = b"ICE"

out = bytes(key[i % len(key)] ^ ch for i, ch in enumerate(string))
out.hex()

'0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f'

## Challenge 6: Break repeating-key XOR

In [9]:
from itertools import zip_longest

In [10]:
string = base64.b64decode(open("challenge-data/6.txt").read())
print(string)

b'\x1dB\x1fM\x0b\x0f\x02\x1fO\x13N<\x1aie\x1fI\x1c\x0eN\x13\x01\x0b\x07N\x1b\x01\x16E6\x00\x1e\x01Id T\x1d\x1dC3SNeR\x06\x00GT\x1c\rEM\x07\x04\x0cS\x12<\x0c\x1e\x08I\x1a\t\x11O\x14L!\x1aG+\x00\x05\x1dGY\x11\x04\t\x00d&\x07S\x007\x16\x06\x0c\x1a\x17A\x1d\x01RT0_\x00 \x13\n\x05GO\x12H\x08ENe>\x16\t8E\x06\x05\x08\x1aF\x07O\x1fYx~jb6\x0c\x1d\x0fA\rH\x06U\x1a\x1b\x00\x1dBt\x04\x1e\x01I\x1a\t\x11\x02Rz\x7fI\x00H:\x00\x1a\x13I\x1aOEH\x0f\x1d\rS\x04:\x01R\x19\x01\x0bA\x13\x06\x00L1_Sb\x15\x06\x07\t\x07T\x0b\x17A\x14\x16Iy35\x0b\x1b\x01\x05\x0fF\x07O\x1dNxNH\'R\x04\x07\x0cEXH\x08A\x00O T\x08t\x0b\x1d\x19I\x02\x00\x0e\x16\\\x00R0ie\x1fI\x02\x02T\x00\x01\x0b\x07N\x02\x10S\x01&\x10\x15M\x02\x07\x02\x1fO\x1bNx0i6R\n\x01\tT\x06\x07\tSN\x02\x10S\x08;\x10\x06\x05I\x0f\x0f\x10O;\x00:_G+\x1cId3OT\x02\x10S\x1aO\x05\x16\x11t\x0c\x06M\x0f\x02\x0e\x03CRL=N\x00/\x0bI\r\x08N\x17\r\x15T\x1dO\x0e\x1cE^(\x0bM\x19\x01\x12\x07\nUSxNOb\x06\x01\x0bGS\x1d\x0c\x00\x00\x17\n\x05\x1f\x0c:B^M.\x01A"\x0e\x1cI4VAb5\x06OG*~

In [11]:
def hamming_distance(a, b):
    return sum((x ^ y).bit_count() for x, y in zip_longest(a, b, fillvalue=0))

assert hamming_distance(b"this is a test", b"wokka wokka!!!") == 37

In [12]:
guesses = []

for keysize in range(2, 90):
    distance_total = 0
    pairs_processed = 0
    for idx in range(0, len(string) // keysize - 1):
        chunk_a = string[idx * keysize:(idx + 1) * keysize]
        chunk_b = string[(idx + 1) * keysize:(idx + 2) * keysize]
        distance_total += hamming_distance(chunk_a, chunk_b) / keysize
        pairs_processed += 1
    avg = distance_total / pairs_processed
    guesses.append((avg, keysize))

guesses.sort()
best_guess = guesses[0][1]
print(guesses[:10])

best_guess

[(2.7593244194229416, 29), (2.783045977011494, 58), (2.785201149425288, 87), (3.1492537313432836, 67), (3.152560455192034, 38), (3.163059163059163, 84), (3.1643356643356664, 13), (3.1646471846044193, 61), (3.1652690426275334, 9), (3.165379213483146, 16)]


29

In [13]:
groups = [b"" for _ in range(best_guess)]

for i in range(len(string)):
    groups[i % best_guess] += bytes([string[i]])

key = ""
for g in groups:
    _, ch = break_single_character_xor(g)
    key += chr(ch)
key

'Terminator X: Bring the noise'

In [14]:
from itertools import cycle

def decrypt_long_xor(s, key):
    return bytes([c ^ k for c, k in zip(s, cycle(key))])

decrypt_long_xor(string, key.encode("ascii")).decode()

"I'm back and I'm ringin' the bell \nA rockin' on the mike while the fly girls yell \nIn ecstasy in the back of me \nWell that's my DJ Deshay cuttin' all them Z's \nHittin' hard and the girlies goin' crazy \nVanilla's on the mike, man I'm not lazy. \n\nI'm lettin' my drug kick in \nIt controls my mouth and I begin \nTo just let it flow, let my concepts go \nMy posse's to the side yellin', Go Vanilla Go! \n\nSmooth 'cause that's the way I will be \nAnd if you don't give a damn, then \nWhy you starin' at me \nSo get off 'cause I control the stage \nThere's no dissin' allowed \nI'm in my own phase \nThe girlies sa y they love me and that is ok \nAnd I can dance better than any kid n' play \n\nStage 2 -- Yea the one ya' wanna listen to \nIt's off my head so let the beat play through \nSo I can funk it up and make it sound good \n1-2-3 Yo -- Knock on some wood \nFor good luck, I like my rhymes atrocious \nSupercalafragilisticexpialidocious \nI'm an effect and that you can bet \nI can take a

## Challenge 7: AES in ECB mode

In [15]:
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes

text = b''.join(base64.b64decode(line) for line in open("challenge-data/7.txt").readlines())

key = b"YELLOW SUBMARINE"

def decrypt_aes(text, key):
    return Cipher(algorithms.AES(key), modes.ECB()).decryptor().update(text)

decrypt_aes(text, key)

b"I'm back and I'm ringin' the bell \nA rockin' on the mike while the fly girls yell \nIn ecstasy in the back of me \nWell that's my DJ Deshay cuttin' all them Z's \nHittin' hard and the girlies goin' crazy \nVanilla's on the mike, man I'm not lazy. \n\nI'm lettin' my drug kick in \nIt controls my mouth and I begin \nTo just let it flow, let my concepts go \nMy posse's to the side yellin', Go Vanilla Go! \n\nSmooth 'cause that's the way I will be \nAnd if you don't give a damn, then \nWhy you starin' at me \nSo get off 'cause I control the stage \nThere's no dissin' allowed \nI'm in my own phase \nThe girlies sa y they love me and that is ok \nAnd I can dance better than any kid n' play \n\nStage 2 -- Yea the one ya' wanna listen to \nIt's off my head so let the beat play through \nSo I can funk it up and make it sound good \n1-2-3 Yo -- Knock on some wood \nFor good luck, I like my rhymes atrocious \nSupercalafragilisticexpialidocious \nI'm an effect and that you can bet \nI can take 

## Challenge 8: Detect AES in ECB mode

In [20]:
lines = [bytes.fromhex(buf) for buf in open("challenge-data/8.txt").readlines()]

print(set(len(line) for line in lines))

d = Counter()
for line in lines:
    blocks = set()
    for i in range(len(line) // 16):
        block = line[i * 16: (i + 1) * 16]
        blocks.add(block)
    l = len(blocks)
    if l not in d:
        print(l, line)
    d[l] += 1
print(d)

{160}
10 b'\x8a\x10$\x7f\x90\xd0\xa0U8\x88\x8a\xd6 X\x82\x19o_m\x05\xc2\x1e\xc8\xdc\xa0\xcb\x0b\xe0,?\x8b\t\xe3\x82\x96?D:\xa5\x14\xda\xa5\x01%{\t\xa3k\xf8\xc4\xc3\x92\xd8\xca\x1b\xf49_\r_%B\x14\x8c~_\xf2"7\x96\x98t\xbff\xcb\x855~\xf9\x99V\xac\xcf\x13\xba\x1a\xf3l\xa7\xa9\x1aPS<M\x89\xb75?\x90\x8cZ\x16gt);\x0b\xf6$s\x91\xdfi\xc8}\xac\xc4\x12Z\x99\xecAr!\xb5\x81p\xe638\x1e8G\xc6\xb1\xc2\x8d\xda)\x13\xc0\x11\xe1?\xc4@o\x8f\xe7;\xbfx\xe8\x03\xe1\xd9\x95\xceM'
b'E\xfeU\xa71)n\x8c\x10=\x13\xa3\xe7\xfc\x7f\x88\xda\xef\x02\xf1\xb2q\xf2aFU\xe6\x04\xa0\xa4\x89b\xb8\x00]E\x90\x85oH\x12\x95M>9/\xbfd\x97\x03x\xc4\xf1\x1f\x8f\x949\xb6\xbd\x19\x06\xcb"\xa2\xb6/\x95\xbb\x02\\O\xc4\x8a\x9d\xf2\xeb\xc9\x12\x9f\xf4\xad\'\x8f\'\xd0\xafe\xbcm_\x16\x865e\x00\xc4\xb1?\x0bK\xb7\xd8\x85H\'\x02\x13\xc9H^\x98\x98G[\xe7B\x87\xafmo\xc8\xb1\xdb.\xbe\x10;\x13\x14Zb\xba\xfe\x04F\x02\xd7C$\x8e\x8bh\'\xf4v\xdb\x11\x00_}5\x8eI\xf1M\xcd\xd5<M\''
7 b'\xd8\x80a\x97@\xa8\xa1\x9bx@\xa8\xa3\x1c\x81\n=\x08d\x9a\xf7\r\xc0oO\xd