# 17. CBC padding oracle
https://cryptopals.com/sets/3/challenges/17

In [1]:
from Cryptodome.Cipher import AES
from cryptopals.utils import pkcs7_pad, pkcs7_strip, PaddingError
import os
import random

BLOCKSIZE = AES.block_size
KEYSIZE = 32

class Challenge17:
    def __init__(self):
        self.strings = [
            b"MDAwMDAwTm93IHRoYXQgdGhlIHBhcnR5IGlzIGp1bXBpbmc=",
            b"MDAwMDAxV2l0aCB0aGUgYmFzcyBraWNrZWQgaW4gYW5kIHRoZSBWZWdhJ3MgYXJlIHB1bXBpbic=",
            b"MDAwMDAyUXVpY2sgdG8gdGhlIHBvaW50LCB0byB0aGUgcG9pbnQsIG5vIGZha2luZw==",
            b"MDAwMDAzQ29va2luZyBNQydzIGxpa2UgYSBwb3VuZCBvZiBiYWNvbg==",
            b"MDAwMDA0QnVybmluZyAnZW0sIGlmIHlvdSBhaW4ndCBxdWljayBhbmQgbmltYmxl",
            b"MDAwMDA1SSBnbyBjcmF6eSB3aGVuIEkgaGVhciBhIGN5bWJhbA==",
            b"MDAwMDA2QW5kIGEgaGlnaCBoYXQgd2l0aCBhIHNvdXBlZCB1cCB0ZW1wbw==",
            b"MDAwMDA3SSdtIG9uIGEgcm9sbCwgaXQncyB0aW1lIHRvIGdvIHNvbG8=",
            b"MDAwMDA4b2xsaW4nIGluIG15IGZpdmUgcG9pbnQgb2g=",
            b"MDAwMDA5aXRoIG15IHJhZy10b3AgZG93biBzbyBteSBoYWlyIGNhbiBibG93" 
        ]
        self.key = os.urandom(KEYSIZE)

    def get_encrypted_data(self, quiet=True) -> (bytes, bytes):
        '''Select at random one of the 10 strings, pad the string out to the 16-byte AES block size and 
        CBC-encrypt it under the key, providing the caller the ciphertext and IV'''
        _string = random.choice(self.strings)
        if not quiet:
            print(f"{_string=}")
        aes_cbc = AES.new(self.key, AES.MODE_CBC) 
        cipher = aes_cbc.encrypt(pkcs7_pad(_string,BLOCKSIZE))
        return cipher, aes_cbc.iv

    def padding_valid(self, cipher: bytes, iv: bytes) -> bool:
        '''Consume the ciphertext produced by the first function, decrypt it, check its padding, and 
        return true or false depending on whether the padding is valid.'''
        aes_cbc = AES.new(self.key, AES.MODE_CBC, iv)
        plaintext = aes_cbc.decrypt(cipher)
        try:
            pkcs7_strip(plaintext)
        except PaddingError:
            return False
        else:
            return True

In [2]:
challenge17 = Challenge17()
cipher, iv = challenge17.get_encrypted_data(quiet=False)
print(f"{cipher=}")
print(f"{iv=}")
print(f"{challenge17.padding_valid(cipher, iv)=}")

_string=b'MDAwMDA4b2xsaW4nIGluIG15IGZpdmUgcG9pbnQgb2g='
cipher=b'\xba\x87\xfd\xc5\xfd\x90\x07\xc2\xffr3\xf0\xb9\xc2i\xf5\xa0$\x18\x99x\x0b\xa5u\xdc\x81Fhw$1\x07e\x9e\xcd(\x8e\xfag\xe0\xaerECen&\x88'
iv=b' \xba\x874\x14\x17O1\x87<\xd2\xa6\xad\xdc}j'
challenge17.padding_valid(cipher, iv)=True


## Attack

Excellent explanation and tutorial at: https://www.nccgroup.com/us/research-blog/cryptopals-exploiting-cbc-padding-oracles/

* By making modifications to the IV, one can predictably modify the plaintext block. Flipping a bit in the IV will flip the corresponding bit in the plaintext.
* By exploiting this properties, one can build a "zeroing" IV that set some (and eventually all) of the plaintext’s bytes to zero.

In [3]:
from cryptopals.utils import bytes_xor

def single_block_attack(block, oracle):
    zeroing_iv = [0]*BLOCKSIZE # zeroing IV starts out nulled
    for pad_value in range(1,BLOCKSIZE+1): # explore all possible padding values to fill all zeroing block
        padding_iv = [pad_value^b for b in zeroing_iv] # xor pad_value with ziv before searching next ziv byte
        for iv_candidate in range(256): # all possible values for IV byte
            padding_iv[-pad_value] = iv_candidate
            iv = bytes(padding_iv)
            if oracle(block, iv): # padding is valid
                # in case pad_value==1, make sure the padding really is of length 1 
                # by changing penultimate block and querying the oracle again
                #if pad_value == 1: 
                #    padding_iv[-2] ^= 1
                #    iv = bytes(padding_iv)
                #    if not oracle(block, iv):
                #        continue  # false positive, keep searching with next pad_value
                break # good pad_value found
        zeroing_iv[-pad_value] = iv_candidate ^ pad_value
    return bytes(zeroing_iv)

def full_attack(iv, cipher, oracle):
    message = iv + cipher
    blocks = [message[i:i+BLOCKSIZE] for i in range(0, len(message), BLOCKSIZE)]
    result = b''
    iv = blocks[0]
    for cipher in blocks[1:]:
        deckey = single_block_attack(cipher,oracle)
        plaintext = bytes_xor(deckey,iv)
        result += plaintext
        iv = cipher
    return result

In [4]:
from base64 import b64decode

challenge17 = Challenge17()
cipher, iv = challenge17.get_encrypted_data()
print(f"{cipher=}")
print(f"{iv=}")

result = full_attack(iv, cipher, challenge17.padding_valid)
print(f"{result=}")

plaintext = b64decode(pkcs7_strip(result).decode())
print(f"{plaintext=}")

cipher=b'\xce\xabh"X\xc2\x879o\x10\x1e\xfag\xa4\xcbaW\x87\xf2\xabbM\x9dq\x11\xaaN\xfaf\xb7\xe9\xfa,!\xe4\xccy\x92t$u\x07\xe42\xbfVB}p\x7f\x08~)\xd1Q\xbc\x85\xfd\x04\xbc\xd5\xbd\xd0\x00\xd5\xed\x80\x13xE\x170o\xab\xb7\xbe\xb6\x87\x0e\xb5'
iv=b'o\xcf\x9en\xc1r\xc8\x1b\xd8\xe4\xb1A\xeda<@'
result=b'MDAwMDA0QnVybmluZyAnZW0sIGlmIHlvdSBhaW4ndCBxdWljayBhbmQgbmltYmxl\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10'
plaintext=b"000004Burning 'em, if you ain't quick and nimble"


In [5]:
for _ in range(20):
    cipher, iv = challenge17.get_encrypted_data()
    result = full_attack(iv, cipher, challenge17.padding_valid)
    plaintext = b64decode(pkcs7_strip(result).decode())
    print(plaintext)

b'000006And a high hat with a souped up tempo'
b'000005I go crazy when I hear a cymbal'
b'000002Quick to the point, to the point, no faking'
b"000001With the bass kicked in and the Vega's are pumpin'"
b"000007I'm on a roll, it's time to go solo"
b'000000Now that the party is jumping'
b"000001With the bass kicked in and the Vega's are pumpin'"
b"000007I'm on a roll, it's time to go solo"
b"000004Burning 'em, if you ain't quick and nimble"
b'000000Now that the party is jumping'
b'000000Now that the party is jumping'
b'000009ith my rag-top down so my hair can blow'
b"000001With the bass kicked in and the Vega's are pumpin'"
b"000004Burning 'em, if you ain't quick and nimble"
b"000004Burning 'em, if you ain't quick and nimble"
b'000005I go crazy when I hear a cymbal'
b"000007I'm on a roll, it's time to go solo"
b"000003Cooking MC's like a pound of bacon"
b'000005I go crazy when I hear a cymbal'
b"000003Cooking MC's like a pound of bacon"


# 18. Implement CTR, the stream cipher mode

https://cryptopals.com/sets/3/challenges/18

https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)

In [1]:
from cryptopals.utils import bytes_xor
from Cryptodome.Cipher import AES
import math

def generate_ctr_keystream(key: bytes, nonce: int, msglen: int) -> bytes:
    aes = AES.new(key, AES.MODE_ECB)
    keystream = b""
    for counter in range(math.ceil(msglen/AES.block_size)): # generate for N blocks covering all message 
        to_be_encrypted = nonce.to_bytes(length=AES.block_size//2, byteorder='little') + counter.to_bytes(length=AES.block_size//2, byteorder='little')
        keystream += aes.encrypt(to_be_encrypted)
    return keystream[:msglen] # trim keystream to message lenght (if shorter than N blocks 

def aes_ctr_decode_encode(b: bytes, key: bytes, nonce: int) -> bytes:
    return bytes_xor(b,generate_ctr_keystream(key,nonce,len(b)))

In [2]:
from base64 import b64decode

message = b"L77na/nrFsKvynd6HzOoG7GHTLXsTVu9qvY/2syLXzhPweyyMTJULu/6/kXX0KSvoOLSFQ=="
key=b"YELLOW SUBMARINE"
nonce=0 # "number used once"

plaintext = aes_ctr_decode_encode(b64decode(message),key,nonce)
print(f"{plaintext=}")

plaintext=b"Yo, VIP Let's kick it Ice, Ice, baby Ice, Ice, baby "


# 19. Break fixed-nonce CTR mode using substitutions

https://cryptopals.com/sets/3/challenges/19

In [3]:
strings19 = [
b"SSBoYXZlIG1ldCB0aGVtIGF0IGNsb3NlIG9mIGRheQ==",
b"Q29taW5nIHdpdGggdml2aWQgZmFjZXM=",
b"RnJvbSBjb3VudGVyIG9yIGRlc2sgYW1vbmcgZ3JleQ==",
b"RWlnaHRlZW50aC1jZW50dXJ5IGhvdXNlcy4=",
b"SSBoYXZlIHBhc3NlZCB3aXRoIGEgbm9kIG9mIHRoZSBoZWFk",
b"T3IgcG9saXRlIG1lYW5pbmdsZXNzIHdvcmRzLA==",
b"T3IgaGF2ZSBsaW5nZXJlZCBhd2hpbGUgYW5kIHNhaWQ=",
b"UG9saXRlIG1lYW5pbmdsZXNzIHdvcmRzLA==",
b"QW5kIHRob3VnaHQgYmVmb3JlIEkgaGFkIGRvbmU=",
b"T2YgYSBtb2NraW5nIHRhbGUgb3IgYSBnaWJl",
b"VG8gcGxlYXNlIGEgY29tcGFuaW9u",
b"QXJvdW5kIHRoZSBmaXJlIGF0IHRoZSBjbHViLA==",
b"QmVpbmcgY2VydGFpbiB0aGF0IHRoZXkgYW5kIEk=",
b"QnV0IGxpdmVkIHdoZXJlIG1vdGxleSBpcyB3b3JuOg==",
b"QWxsIGNoYW5nZWQsIGNoYW5nZWQgdXR0ZXJseTo=",
b"QSB0ZXJyaWJsZSBiZWF1dHkgaXMgYm9ybi4=",
b"VGhhdCB3b21hbidzIGRheXMgd2VyZSBzcGVudA==",
b"SW4gaWdub3JhbnQgZ29vZCB3aWxsLA==",
b"SGVyIG5pZ2h0cyBpbiBhcmd1bWVudA==",
b"VW50aWwgaGVyIHZvaWNlIGdyZXcgc2hyaWxsLg==",
b"V2hhdCB2b2ljZSBtb3JlIHN3ZWV0IHRoYW4gaGVycw==",
b"V2hlbiB5b3VuZyBhbmQgYmVhdXRpZnVsLA==",
b"U2hlIHJvZGUgdG8gaGFycmllcnM/",
b"VGhpcyBtYW4gaGFkIGtlcHQgYSBzY2hvb2w=",
b"QW5kIHJvZGUgb3VyIHdpbmdlZCBob3JzZS4=",
b"VGhpcyBvdGhlciBoaXMgaGVscGVyIGFuZCBmcmllbmQ=",
b"V2FzIGNvbWluZyBpbnRvIGhpcyBmb3JjZTs=",
b"SGUgbWlnaHQgaGF2ZSB3b24gZmFtZSBpbiB0aGUgZW5kLA==",
b"U28gc2Vuc2l0aXZlIGhpcyBuYXR1cmUgc2VlbWVkLA==",
b"U28gZGFyaW5nIGFuZCBzd2VldCBoaXMgdGhvdWdodC4=",
b"VGhpcyBvdGhlciBtYW4gSSBoYWQgZHJlYW1lZA==",
b"QSBkcnVua2VuLCB2YWluLWdsb3Jpb3VzIGxvdXQu",
b"SGUgaGFkIGRvbmUgbW9zdCBiaXR0ZXIgd3Jvbmc=",
b"VG8gc29tZSB3aG8gYXJlIG5lYXIgbXkgaGVhcnQs",
b"WWV0IEkgbnVtYmVyIGhpbSBpbiB0aGUgc29uZzs=",
b"SGUsIHRvbywgaGFzIHJlc2lnbmVkIGhpcyBwYXJ0",
b"SW4gdGhlIGNhc3VhbCBjb21lZHk7",
b"SGUsIHRvbywgaGFzIGJlZW4gY2hhbmdlZCBpbiBoaXMgdHVybiw=",
b"VHJhbnNmb3JtZWQgdXR0ZXJseTo=",
b"QSB0ZXJyaWJsZSBiZWF1dHkgaXMgYm9ybi4="]

In [4]:
import os
key19 = os.urandom(AES.block_size)
nonce = 0
ciphers19 = [ aes_ctr_decode_encode(b64decode(s),key19,nonce) for s in strings19 ]

## Attack

> Because the CTR nonce wasn't randomized for each encryption, each ciphertext has been encrypted against the same keystream. (...) the actual "encryption" of a byte of data boils down to a single XOR operation

I can try to reconstruct the keystream by using frequency analysis on all bytes in the same position from all 40 ciphers, using the same approach from challenge 3. One issue will be that this will work fine for the first bytes (for which I have a lot of ciphers) but fails for the last bytes of longer ciphers...

In [65]:
freqs_initials = {
"A":	11.7,	
"B":	4.4,	
"C":	5.2	, 
"D":	3.2	,
"E":	2.8	,
"F":	4	,
"G":	1.6	,
"H":	4.2	,
"I":	7.3	,
"J":	0.51,	
"K":	0.86,	
"L":	2.4	,
"M":	3.8	,
"N":	2.3	,
"O":	7.6	,
"P":	4.3	,
"Q":	0.22,	
"R":	2.8	,
"S":	6.7	,
"T":	16	,
"U":	1.2	,
"V":	0.82,	
"W":	5.5	,
"X":	0.045,	
"Y":	0.76,	
"Z":	0.045,
}

freqs_initials = { l : f*0.01 for l,f in freqs_initials.items()} 

In [66]:
from cryptopals.utils import freqs_letters, score_text, crack_single_xor

def find_ctr_keystream(ciphers):
    maxlen = max( [len(c) for c in ciphers])
    
    cipcols = [ b"" ] * maxlen
    for c in ciphers:
        for i,b in enumerate(c):
            cipcols[i] += b.to_bytes()
    
    keystream = b""
    for i,cc in enumerate(cipcols):
        if i==0:
            best_guess = crack_single_xor(cc,freqs_initials)
        else:
            best_guess = crack_single_xor(cc,freqs_letters)
        keystream += best_guess[2]

    return keystream

In [67]:
keystream19 = find_ctr_keystream(ciphers19)
for c in ciphers19:
    print(bytes_xor(c,keystream19))

b'i have met them at close of&,a&'
b'coming with vivid faces'
b'from counter or desk among a:e&'
b'eighteenth-century houses.'
b'i have passed with a nod of&<h:\x80\x08\t\x00\x00'
b'or polite meaningless words*'
b'or have lingered awhile and&;a6\xc4'
b'polite meaningless words,'
b'and thought before I had doh-'
b'of a mocking tale or a gibe'
b'to please a companion'
b'around the fire at the club*'
b'being certain that they and&\x01'
b'but lived where motley is wi:ne'
b'all changed, changed utterl\x7fr'
b'a terrible beauty is born.'
b"that woman's days were spenr"
b'in ignorant good will,'
b'her nights in argument'
b'until her voice grew shrill('
b'what voice more sweet than n-r,'
b'when young and beautiful,'
b'she rode to harriers?'
b'this man had kept a school'
b'and rode our winged horse.'
b'this other his helper and ft!e1\xc4'
b'was coming into his force;'
b'he might have won fame in tn- :\xce\x04@'
b'so sensitive his nature seek-ds'
b'so daring and sweet his thos/h+\x8e'
b'this oth

### Notes

* Why is the first letter lowecase (and it's uppercase in the original strings)?

# 20. Break fixed-nonce CTR statistically

https://cryptopals.com/sets/3/challenges/20

> Take your collection of ciphertexts and truncate them to a common length (the length of the smallest ciphertext will work).

I could do this, or I could use the approach used for challenge 19 and try to exploit the smaller stastistics also for the longer messages: in many cases I can do better!

### Notes

* The first character is never recovered: why?!
    * The first character is always a letter (no space or puctuation!) so the frequency is likely different that in normal text. I tried to implement this, but it does not solve the issue (?)
    * The fact that the first letter in Challenge 19 (solved in the same way) is always lowercase adds to the weirdness of the result... 
* Even a smaller statistics can lead to a correct single XOR crack, so no real reason not to attempt and truncate the data to the smallest ciphertext!

In [68]:
key20 = os.urandom(AES.block_size)

with open("input/20.txt") as f:
    strings20 = [ b64decode(l.strip()) for l in f.readlines() ]

ciphers20 = [ aes_ctr_decode_encode(p,key20,nonce) for p in strings20 ]

In [69]:
minlen = min([len(c) for c in ciphers20])
ciphers20_trunc = [c[:minlen] for c in ciphers20]

#keystream20 = find_ctr_keystream(ciphers20_trunc)
keystream20 = find_ctr_keystream(ciphers20)

In [70]:
plaintext20 = [ bytes_xor(c,keystream20) for c in ciphers20] 

for p,s in zip(plaintext20,strings20):
    print(f"Original  : {s.decode()}")
    print(f"Recovered : {p.decode()}")
    print()

Recovered : n'm rated "R"...this is a wasning, ya better void / Poets are paranoid,!DJ's&D-stroyed

Original  : Cuz I came back to attack others in spite- / Strike like lightnin', It's quite frightenin'!
Recovered : duz I came back to attack otiers in spite- / Strike like lightnin', It'r quire frightenin' 

Original  : But don't be afraid in the dark, in a park / Not a scream or a cry, or a bark, more like a spark;
Recovered : eut don't be afraid in the d`rk, in a park / Not a scream or a cry, or ` barm, more like a!pa$n

Original  : Ya tremble like a alcoholic, muscles tighten up / What's that, lighten up! You see a sight but
Recovered : ~a tremble like a alcoholic,!muscles tighten up / What's that, lighten tp! Yiu see a sight!nut

Original  : Suddenly you feel like your in a horror flick / You grab your heart then wish for tomorrow quick!
Recovered : tuddenly you feel like your hn a horror flick / You grab your heart theo wisn for tomorrow!}uin$t

Original  : Music's the clue, when

# 21. Implement the MT19937 Mersenne Twister RNG

https://cryptopals.com/sets/3/challenges/21

https://en.wikipedia.org/wiki/Mersenne_Twister

In [1]:
class MT19937:

    # Coefficients for MT19937
    (w, n, m, r) = (32, 624, 397, 31)
    a = 0x9908B0DF
    (u, d) = (11, 0xFFFFFFFF)
    (s, b) = (7, 0x9D2C5680)
    (t, c) = (15, 0xEFC60000)
    l = 18
    f = 1812433253

    UMASK = 0xFFFFFFFF & (0xFFFFFFFF << r)       # Limit to 32 bits
    LMASK = 0xFFFFFFFF & (0xFFFFFFFF >> (w - r)) # Limit to 32 bits

    def __init__(self, seed: int = 19650218):
        self.state_array = [0] * self.n  # Array for the state vector
        self.state_index = 0            # Index into state vector array, 0 <= state_index <= n-1
        self.initialize_state(seed)
        self.seed = seed

    def initialize_state(self, seed: int = 19650218):
        self.state_array[0] = seed & 0xFFFFFFFF  # Limit to 32 bits
        for i in range(1, self.n):
            seed = (self.f * (seed ^ (seed >> (self.w - 2))) + i) & 0xFFFFFFFF # Limit to 32 bits
            self.state_array[i] = seed
        self.state_index = 0

    def random(self):
        k = self.state_index               # Current state location
        j = (k - (self.n - 1)) % self.n    # n-1 iterations before

        x = (self.state_array[k] & self.UMASK) | (self.state_array[j] & self.LMASK)
        xA = x >> 1
        if x & 0x00000001:
            xA ^= self.a

        j = (k - (self.n - self.m)) % self.n  # n-m iterations before
        x = self.state_array[j] ^ xA          # Compute next value in the state

        self.state_array[k] = x & 0xFFFFFFFF  # Limit to 32 bits
        k = (k + 1) % self.n                  # Circular indexing
        self.state_index = k

        # Tempering
        y = x ^ (x >> self.u)
        y = y ^ ((y << self.s) & self.b)
        y = y ^ ((y << self.t) & self.c)
        z = y ^ (y >> self.l)
        return z & 0xFFFFFFFF  # Return 32-bit integer

In [2]:
mt_random = MT19937()

for _ in range(10):
    print(mt_random.random())

2325592414
482149846
4177211283
3872387439
1663027210
2005191859
666881213
3289399202
2514534568
3882134983


# 22. Crack an MT19937 seed

https://cryptopals.com/sets/3/challenges/22

In [3]:
import time
import random

def get_MT19937_int32(reallywait=True,tmin=40,tmax=1000):
    '''
    Wait a random number of seconds between 40 and 1000
    Seeds the RNG with the current Unix timestamp
    Waits a random number of seconds again.
    Returns the first 32 bit output of the RNG.
    '''
    sleep1 = random.uniform(tmin,tmax)
    sleep2 = random.uniform(tmin,tmax)
    print(f"Waiting {sleep1:2.2f} seconds...")
    if reallywait:
        time.sleep(sleep1)
        seed = int(time.time())
    else:
        seed = int(time.time()-sleep1-sleep2)
    print(f"Seedind MT19937 with current Unix time ({seed})...")
    mt_random = MT19937(seed)
    print(f"Waiting {sleep2:2.2f} seconds...")
    if reallywait:
        time.sleep(sleep2)
    rnd32 = mt_random.random()
    print(f"Serving random number {rnd32}")
    return rnd32

def crack_MT19937_seed(rnd32):
    print("Attempting to guess MT19937 seed...")
    now = int(time.time())+1
    while True:
        mt_random = MT19937(now)
        _rnd = mt_random.random()
        if rnd32==_rnd:
            print(f"Guessed seed: {now}")
            return now
        # go back in time
        now -= 1

In [4]:
rnd32 = get_MT19937_int32(False)
print()
crack_MT19937_seed(rnd32)

Waiting 666.76 seconds...
Seedind MT19937 with current Unix time (1736774352)...
Waiting 200.56 seconds...
Serving random number 2527403948

Attempting to guess MT19937 seed...
Guessed seed: 1736774352


1736774352

In [5]:
rnd32 = get_MT19937_int32(True,40,100)
print()
crack_MT19937_seed(rnd32)

Waiting 41.39 seconds...
Seedind MT19937 with current Unix time (1736775261)...
Waiting 63.92 seconds...
Serving random number 2243879853

Attempting to guess MT19937 seed...
Guessed seed: 1736775261


1736775261

# 23. Clone an MT19937 RNG from its output

https://cryptopals.com/sets/3/challenges/23

### XOR inverting

Inverting Left-Shift AND XOR (e.g. `y = y ^ ((y << t) & c))`:

This operation involves left-shifting $y$ by $t$ bits, ANDing it with $c$, and XORing it with $y$. To invert it:

* Start with the given $z$.

* Begin with the least significant bits (LSBs), since these are not affected by the left shift.

* Gradually determine higher-order bits, propagating information from lower-order bits.
The loop `for _ in range(32): y = y ^ ((y << t) & c)` ensures the dependencies from lower to higher bits are fully resolved.

In [6]:
def temper(x, u, s, b, t, c, l):
    """
    Applies the tempering process to the input value x.
    
    Parameters:
    - x: The input value to be tempered.
    - u, s, t, l: Shift parameters for the tempering steps.
    - b, c: Masks used in the tempering steps.
    
    Returns:
    - The tempered 32-bit integer value.
    """
    y = x ^ (x >> u)               # Step 1: XOR with right shift
    y = y ^ ((y << s) & b)         # Step 2: XOR with left shift and mask b
    y = y ^ ((y << t) & c)         # Step 3: XOR with left shift and mask c
    z = y ^ (y >> l)               # Step 4: XOR with right shift
    return z & 0xFFFFFFFF          # Ensure 32-bit integer output

def invert_xor_shift(value, shift, mask, direction):
    """
    Inverts an XOR combined with a bitwise shift operation.
    
    Parameters:
    - value: The output value from the tempering step to invert.
    - shift: The number of bits to shift.
    - mask: The mask applied in the original operation (use 0 for no masking).
    - direction: The direction of the shift, either 'left' or 'right'.
    
    Returns:
    - The inverted value.
    """
    result = value
    for _ in range(32):  # Iteratively resolve dependencies for all bits
        if direction == 'right':
            result = value ^ (result >> shift)
        elif direction == 'left':
            result = value ^ ((result << shift) & mask)
    return result

def inverse_temper(z, u, s, b, t, c, l):
    """
    Inverts the tempering function to recover the original value x from z.
    
    Parameters:
    - z: The tempered output value.
    - u, s, t, l: Shift parameters from the tempering function.
    - b, c: Masks used in the tempering function.
    
    Returns:
    - The original input value x.
    """
    # Step 1: Invert the final XOR with a right shift
    y = invert_xor_shift(z, l, 0, 'right')
    
    # Step 2: Invert the XOR with a left shift and mask c
    y = invert_xor_shift(y, t, c, 'left')
    
    # Step 3: Invert the XOR with a left shift and mask b
    y = invert_xor_shift(y, s, b, 'left')
    
    # Step 4: Invert the initial XOR with a right shift
    x = invert_xor_shift(y, u, 0, 'right')
    
    return x & 0xFFFFFFFF  # Ensure 32-bit integer output

In [7]:
# Coefficients for MT19937
(w, n, m, r) = (32, 624, 397, 31)
a = 0x9908B0DF
(u, d) = (11, 0xFFFFFFFF)
(s, b) = (7, 0x9D2C5680)
(t, c) = (15, 0xEFC60000)
l = 18
f = 1812433253

x = 2934351
z = temper(x, u, s, b, t, c, l)
x1 = inverse_temper(z, u, s, b, t, c, l)
print(x,x1)

2934351 2934351


In [8]:
# Initialize target MT19937 generator.
# Use random seed (time, even if time should not be used ;-) )

import time
seed = int(time.time())
mt_random = MT19937(seed)

# let generator run for a while...
import random
for _ in range(int(random.uniform(1,1000))):
    _ = mt_random.random()

In [9]:
# listen for next 624 random numbers, invert them, store results in state array 
state_array = [ inverse_temper(mt_random.random(), u, s, b, t, c, l) for _ in range(n) ] # n = 624, size of state_array

# create clone generator, use seed=0 to guarantee it'd be different from target one
# align to target uinsg state array
mt_random_clone = MT19937(seed=0)
mt_random_clone.state_array = state_array

# check generators are aligned
print(f"SEED T = {mt_random.seed:10}  SEED C = {mt_random_clone.seed:10}")
print("="*40)
for _ in range(10):
    print(f"Target = {mt_random.random():10}   Clone = {mt_random_clone.random():10}")

SEED T = 1736775325  SEED C =          0
Target = 3854118812   Clone = 3854118812
Target =  380392037   Clone =  380392037
Target = 2839737078   Clone = 2839737078
Target = 1038171841   Clone = 1038171841
Target = 4053706524   Clone = 4053706524
Target =  945699746   Clone =  945699746
Target = 2665857380   Clone = 2665857380
Target =  166914768   Clone =  166914768
Target =  672230279   Clone =  672230279
Target =  727876151   Clone =  727876151


In [10]:
# numpy has a MT implementation, it can be cloned with the same technique

import numpy as np

mt_np = np.random.MT19937()

state_array_np = [ inverse_temper(mt_np.random_raw(), u, s, b, t, c, l) for _ in range(n) ] # n = 624, size of state_array
mt_random_clone_np = MT19937(seed=123456789)
mt_random_clone_np.state_array = state_array_np

for _ in range(10):
    print(f"Target = {mt_np.random_raw():10}   Clone = {mt_random_clone_np.random():10}")

Target = 1028434106   Clone = 1028434106
Target = 2806997165   Clone = 2806997165
Target = 2716032945   Clone = 2716032945
Target =  841160340   Clone =  841160340
Target = 2405049579   Clone = 2405049579
Target = 1117596065   Clone = 1117596065
Target = 1429809184   Clone = 1429809184
Target = 2725643703   Clone = 2725643703
Target = 4200412251   Clone = 4200412251
Target = 2644740582   Clone = 2644740582


### Discussion

> How would you modify MT19937 to make this attack hard? What would happen if you subjected each tempered output to a cryptographic hash?

Some ideas:

**Adding a Cryptographic Hash to Outputs**

By applying a cryptographic hash (like SHA-256 or SHA-3) to each tempered output, one effectively obscures the relationship between the internal state and the output. The cryptographic hash is designed to be one-way, so it becomes computationally infeasible for an attacker to reverse the hash and recover $z$.

**Introduce a Mixing Layer**

After generating the random output $z$, one could pass it through a mixing layer that makes inversion infeasible. For example:

- Use a keyed cryptographic function like AES:
  $\text{output} = \text{AES}_{\text{key}}(z)$
  Without the key, the attacker cannot reverse the output back to \( z \).

- Combine multiple outputs before releasing them:
  $
  \text{output} = (z_1 \oplus z_2) \oplus z_3
  $
  This adds complexity to the recovery process.

**Periodic State Reseeding**

- Periodically reseed the RNG state with entropy from a secure source (e.g., hardware RNG).
- Even if an attacker partially recovers the state, the reseeding process disrupts their ability to predict future outputs.

# 24. Create the MT19937 stream cipher and break it

https://cryptopals.com/sets/3/challenges/24

> Use MT19937 with a 16-bit seed to generate a sequence of 8 bit outputs. Use those outputs as a keystream: XOR each byte of plaintext with each successive byte of keystream. This code should look similar to your CTR code.

> From the ciphertext, recover the "key" (the 16 bit seed). Use the same idea to generate a random "password reset token" using MT19937 seeded from the current time.

Since the seed phase space is relativelty small ($2^16$) it can easily be brute-forced if plaintext and cipher are known.

In [11]:
from cryptopals.random import MT19937
from cryptopals.utils import bytes_xor
import math

def convert_int32_4int8(n: int) -> list:
    '''convert 32-bit integer in list of 4 8-bit integers'''
    return [int(i) for i in n.to_bytes(4)]

def generate_MT19937_keystream(seed: int, msglen: int) -> bytes:
    mt_random = MT19937(seed)
    keystream = b""
    for _ in range(math.ceil(msglen/4)):
        for i in convert_int32_4int8(mt_random.random()):
            keystream += i.to_bytes()
    return keystream[:msglen]

def MT19937_stream_decode_encode(b: bytes, seed: int) -> bytes:
    return bytes_xor(b,generate_MT19937_keystream(seed,len(b)))

In [12]:
import random

seed = random.getrandbits(16) # 16-bit seed

plaintext = b"This is a secret message"

cipher = MT19937_stream_decode_encode(plaintext, seed)
plaintext = MT19937_stream_decode_encode(cipher, seed)

print(plaintext)

b'This is a secret message'


In [13]:
def recover_seed_cipher_plain(cipher: bytes, plaintext: bytes) -> int:
    for seed_guess in range(2**16):
        plaintext_guess = MT19937_stream_decode_encode(cipher, seed_guess)
        if plaintext_guess==plaintext:
            return seed_guess

In [14]:
from string import ascii_letters
import time

plaintext = str.encode("".join([random.choice(ascii_letters) 
                                for _ in range(int(random.uniform(10,30)))]) + 14*"A" ) 
seed = random.getrandbits(16)
cipher = MT19937_stream_decode_encode(plaintext, seed)

start = time.time()
seed_guess = recover_seed_cipher_plain(cipher, plaintext)
end = time.time()

print(f"seed = {seed:5} | seed guess = {seed_guess:5}")
print(f"seed guessed in {end-start:3.1f} s")

seed =  5994 | seed guess =  5994
seed guessed in 1.1 s


In [15]:
from cryptopals.utils import freqs_letters, score_text

def recover_seed_cipher(cypher: bytes, freqs=freqs_letters) -> bytes:
    best_guess = (float('inf'), None, None) # score, plaintext guess, key guess
    for seed_guess in range(2**16):
        plaintext_guess = MT19937_stream_decode_encode(cipher, seed_guess)
        score = score_text(plaintext_guess, freqs=freqs_letters)
        curr_guess = (score, plaintext_guess, seed_guess)
        best_guess = min(best_guess, curr_guess)
    return best_guess

In [16]:
plaintext = b"This is a longer secret message, that I could possibly crack with frequency analysis"
seed = random.getrandbits(16)
cipher = MT19937_stream_decode_encode(plaintext, seed)

start = time.time()
score, plaintext_guess, seed_guess = recover_seed_cipher(cipher)
end = time.time()

print(f"seed = {seed:5} | seed guess = {seed_guess:5}")
print(f"seed guessed in {end-start:3.1f} s")
print(f"{plaintext_guess=}")

seed = 28379 | seed guess = 28379
seed guessed in 14.8 s
plaintext_guess=b'This is a longer secret message, that I could possibly crack with frequency analysis'
