In [7]:
import pandas as pd

In [12]:
import math

## Codigo de md5

In [8]:
def F(B,C,D):
    return (B & C) | (~B & D)

def G(B,C,D):
    return (B & D) | (C & ~D)

def H(B,C,D):
    return B ^ C ^ D

def I(B,C,D):
    return C ^ (B | ~D)

In [9]:
def custom_md5(m: str, h0: int) -> str:
    # Argumentos:
    #    m:   str - mensaje
    #    h0: int - constante  inicial  H_0
    # Retorna:
    #    str - hash  MD5  correcto  del  mensaje  en  formato  hexadecimal
    
    # PADDING
    m = bytearray(m, encoding='utf-8')
    orig_len_in_bits = (8 * len(m)) & 0xffffffffffffffff
    m.append(0x80)
    while len(m) % 64 != 56:
        m.append(0)
    m += orig_len_in_bits.to_bytes(8, byteorder='little')
    chunks = [m[i:i+64] for i in range(0, len(m), 64)]
    
    # Define s and K
    s = [0 for i in range(64)] 
    s[ 0:16] = [ 7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22 ]
    s[16:32] = [ 5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20 ]
    s[32:48] = [ 4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23 ]
    s[48:64] = [ 6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21 ]
    
    K = [0 for i in range(64)]
    for i in range(64):
        K[i] = math.floor(2**32 * abs(math.sin(i + 1)))
    
    A = (h0 // 2**(32*3)) & 0xFFFFFFFF
    B = (h0 // 2**(32*2) - A * 2**32) & 0xFFFFFFFF
    C = (h0 // 2**(32) - A * 2**(32*2) - B * 2**32) & 0xFFFFFFFF
    D = (h0 - A * 2**(32*3) - B * 2**(32*2) - C * 2**32) & 0xFFFFFFFF
    
    for chunk in chunks:
        M = [chunk[chk:chk+4] for chk in range(0, 64, 4)]
        a,b,c,d = A,B,C,D
        # Main loop
        for i in range(64):
            f = ''
            g = 0
            if 0 <= i <= 15:
                f = F(b,c,d)
                g = i
            elif 16 <= i <= 31:
                f = G(b,c,d)
                g = (5*i + 1) % 16
            elif 32 <= i <= 47:
                f = H(b,c,d)
                g = (3*i + 5) % 16
            elif 48 <= i <= 63:
                f = I(b,c,d)
                g = (7*i) % 16
            f = (f + a + K[i] + int.from_bytes(M[g], byteorder='little')) & 0xFFFFFFFF
            a = d
            d = c
            c = b
            b = (b + (((f << s[i]) & 0xFFFFFFFF) | (f >> (32-s[i])))) & 0xFFFFFFFF
        A = (A + a) & 0xFFFFFFFF
        B = (B + b) & 0xFFFFFFFF
        C = (C + c) & 0xFFFFFFFF
        D = (D + d) & 0xFFFFFFFF
    final_value = A + (B << 32) + (C << 32*2) + (D << 32*3)
    digest = final_value.to_bytes(16, byteorder='little')
    return f'{int.from_bytes(digest, byteorder="big"):032x}'

## Pregunta 3

In [1]:
def get_h0(num: int) -> int:
    return (num << 32*3) + 74215557203200330836762317942

In [3]:
def _check_strings(*args):
    for arg in args:
        if not isinstance(arg, str):
            raise AttributeError("Expected a string")


def xor(k, m):
    _check_strings(k, m)

    result = ""
    for i in range(len(m)):
        result += chr((ord(k[i % len(k)]) ^ ord(m[i])) % 128)

    return result

In [5]:
def binary_string_to_string(bin_str: str) -> str:
    out_str = ''
    for i in range(0,len(bin_str),8):
        out_str += chr(int(bin_str[i:i+8],2))
    return out_str

def string_to_binary_string(string: str) -> str:
    return ''.join([str(f'{ord(c):07b}') for c in string])

In [10]:
channel = pd.read_csv('mensajes_pregunta_3.csv', header=None)

In [13]:
i = 0
msgs = []
while(True):
    value = custom_md5('jpchacon@uc.cl', get_h0(16641590 *100 + i))
    if len(channel.loc[channel[0] == value]) == 0:
        break
    msgs.append(channel.loc[channel[0] == value].iloc[0][1])
    i += 1
    
msgs = list(map(lambda x: binary_string_to_string(x), msgs))

In [22]:
msgs

['\x7f\x08P\\!\x07|>\x05\x12',
 's\x08_Ps\x04:=\x08\x12',
 '>XVF \x0e8i\x04\x11',
 '>\\_Ps\x083;\x1f\x17',
 'zGEF}K\x14,M\x1d',
 'wL\x17[<\x1f|"\x03\x17',
 'i\x08_P!K2(\x00\x1e',
 "2\x08U@'K4,M\x11",
 "pM@\x15'\x03==M\x06",
 'vM\x17B<\x197,\tT',
 'wF\x17A;\x0e|\x0f\x05\x1e',
 'jAX[s/99\r\x0e',
 "jER['E|\x19\x1e\x1a",
 'm]ZT1\x07%i\x1f\x17',
 'pKR\x15;\x0e|!\r\x1d',
 '>[XX6\x1f5$\t\x0b',
 '>[RP=K4,\x1e[',
 'iAC]s\x045%\x15Z',
 "vIYQ K='\x08U",
 '}IEG*\x022.L\x15',
 'C\x0bw$yNNuCX',
 "\x0b\x1d'-vD\x0bt\x0cG",
 '\x06Xj tHJi\nJ',
 "\x02\x14'/xB\x0bh\r\x08",
 '\x0c\x16bexF\x0bs\x0bJ',
 'C\x16h3rL\x06p\x11G',
 '\x17\x11i"7MJd\x0bD',
 '\r\x1dtk7sCbC[',
 "\x02\x0b'$7BDk\x07\x0e",
 "\x0f\x17h.~NL'\x04K",
 '\x11\x14+exF\x0bf\x00D',
 "\x16\x0c'1`EEs\x1b\x07",
 '\x10\x1dq y\x0c\x0bp\x0b]',
 "\x0bXs-~C@'\nI",
 '\n\n+ev\x00Mu\x07L',
 '\x08\x14b!7FJd\x07\x02',
 'C\x19i!7S\\n\x04Y',
 'OXf1\x7fLNs\x0bO',
 'C\x15h3rMNi\x16P',
 'MXFeyAYu\rU',
 '}%\x14(/yp1w+',
 "<%\x1fe}px';<",
 '0v\x18/}a} w\x10',
 '(8\

In [23]:
def tap(encrypted_messages, words_by_key, min_len=0):
    probable_set = {msg: [0] * len(words_by_key) for msg in encrypted_messages}
    
    for msg in encrypted_messages:
        for idx, words in enumerate(words_by_key):
            if len(words_by_key[idx]) < min_len:
                continue
            length = len(words)
            match = 0
            for word in words:
                count_rare = 0
                count_common = 0
                for i in range(0,70,7):
                    if string_to_binary_string(xor(msg,word))[i:i+2] == '11':
                        count_rare += 1
                    elif string_to_binary_string(xor(msg,word))[i:i+2] == '00':
                        count_common += 1
                if count_rare <= 0 and count_common >= 5:
                    match += 1
            probable_set[msg][idx] = round(match/length, 4)
            
    for msg in encrypted_messages:
        max_value = max(probable_set[msg])
        max_index = probable_set[msg].index(max_value)
        if msg not in words_by_key[max_index]:
            for words in words_by_key:
                if msg in words:
                    words.remove(msg)
            words_by_key[max_index].add(msg)

In [24]:
def probability_space(word, words):
    length = len(word)
    count = [0] * length
    
    for i in words:
        cypher = string_to_binary_string(xor(word, i))
        for j in range(0,70,7):
            if cypher[j] == '1':
                count[int(j/7)] += 1
    return [round(i/len(words), 4) for i in count]

In [25]:
def get_max_index(i, l):
    result = 0
    max_prob = 0
    for idx, m in enumerate(l):
        if m[i] > max_prob:
            max_prob = m[i] 
            result = idx
    return result

In [26]:
def decript_set(encrypted_messages: list[str], decriptd_list: list[str]) -> list[str]:
    l = []
    for word in encrypted_messages:
        l.append(probability_space(word, encrypted_messages))
    
    max_indices = [get_max_index(i, l) for i in range(len(l[0]))]
    
    encrypted_spaces = ""
    for i in range(len(max_indices)):
        encrypted_spaces += encrypted_messages[max_indices[i]][i]
        
    probable_key = xor(encrypted_spaces, ' ' * 10)
    
    for msg in encrypted_messages:
        decriptd_list.append(xor(probable_key, msg))

In [27]:
def  break_random_otp(encrypted_messages: list[str]) -> list[str]:
    # Argumentos:
    #    encrypted_messages:   list[str] - lista de  mensajes  encriptados.
    # Retorna:
    #    list[str] - lista de  mensajes  decriptados.
    
    words_by_key = []
    words_classified = set()
    for idx, msg_1 in enumerate(encrypted_messages):
        if msg_1 not in words_classified:
            words_classified.add(msg_1)
            new_set = set()
            new_set.add(msg_1)
            words_by_key.append(new_set)
        for msg_2 in encrypted_messages[idx+1:]:
            if msg_2 in words_classified:
                continue
            count_rare = 0
            count_common = 0
            for i in range(0,70,7):
                if string_to_binary_string(xor(msg_1,msg_2))[i:i+2] == '11':
                    count_rare += 1
                elif string_to_binary_string(xor(msg_1,msg_2))[i:i+2] == '00':
                    count_common += 1
            if count_rare <= 0 and count_common >= 6:
                # search msg1 and add msg
                for words in words_by_key:
                    if msg_1 in words:
                        words.add(msg_2)
                        words_classified.add(msg_2)
    
    for _ in range(3):
        tap(encrypted_messages, words_by_key)
    tap(encrypted_messages, words_by_key, 10)
    tap(encrypted_messages, words_by_key, 15)
    
    words_by_key = list(filter(lambda x: len(x) > 0,words_by_key))
    
    # decript each set
    decripted_messages = []
    for cjto in words_by_key: 
        decript_set(list(cjto), decripted_messages)
    return decripted_messages

In [29]:
result = break_random_otp(msgs)

In [31]:
result

['ed not knm',
 'bce he h`g',
 '  but he k',
 'en the Fhd',
 'xment. Ps`',
 ',passed ik',
 ',sometimdq',
 '{ith oilx ',
 ',the corrm',
 'bew that |',
 ',seen hes!',
 'hors. He g',
 'dands ane/',
 'm girl whh',
 '\x7fumably rm',
 'oarrying!o',
 'de worked.',
 '{ her namd',
 'a he ofteh',
 'xion Dep`t',
 '}even  whu',
 'ol jon on ',
 '  A nmrrn}',
 'f thiok ia',
 '{t twintx/',
 '" ath`ethg',
 'eled jacd*',
 'ane oj thb',
 'os a nold&',
 '`es. _he s',
 'gr, a,frdd',
 'zing aachl',
 '.and \x7fwigq',
 'bookibg gc',
 '.nove`-wro',
 '|l, oj acl',
 '.moveaenux',
 'fe hah soo',
 '.spanber p',
 'k mecdanib',
 ' Set Le gv',
 'cd xhe 6af',
 'k hir h(qr',
 '# Wenst.o ',
 '-scmrle5 t',
 'h, {as 6ow',
 "bve~all2-'",
 '` oj th$ O',
 'xt xhe 2ib',
 '~t cf h$r.',
 'xnicr A/tm',
 'cd \x7feve3am',
 'ay inou&i%',
 'yo nrin&!k',
 '}elenes2!m',
 'lsh  em#lc',
 'ead,dis-hd',
 'gusx ti&ir',
 '-tiaes 3ou',
 "hd der 'sa",
 'y field} h',
 'icg her !B',
 'e-of homje',
 'ni cold.bi',
 'hh manaiee',
 'nhral cbem',
 