# Task 11 - SHA-1
Felix Kleinsteuber

Matrikelnummer: 185 709

In [19]:
import numpy as np

## 1. Preprocessing

In [20]:
# helper functions https://stackoverflow.com/a/30375198/6600660

def int_to_bytes(x: int) -> bytes:
    return x.to_bytes((x.bit_length() + 7) // 8, 'big')

def int_from_bytes(xbytes: bytes) -> int:
    return int.from_bytes(xbytes, 'big')

# neu

def int_to_nbytes(i: int, n: int) -> bytearray:
    """ Interpretiere i als n-Byte Unsigned Integer und konvertiere ihn in n Bytes. """
    barr = bytearray(n)
    for off in range(n):
        barr[-1-off] = i & 0xFF
        i >>= 8
    return barr

In [21]:
int_from_bytes(bytes([0b11111111]))

255

In [22]:
def sha1_preprocess(m: bytes) -> list:
    # Länge der Nachricht in bits
    mL = len(m) * 8
    print(f"message length: {mL} bits")
    # Hänge 1 an und fülle auf ganze Bytes auf
    # Da wir mit bytes arbeiten, gilt 8 | mL (mL Bitlänge der Nachricht)
    # Wir fügen daher das Byte 0x80 an (in Binär: 0b1000 0000)
    m += bytes([0x80])
    # Hänge genügend 0 Bits hinzu, bis Länge kongruent 448 mod 512
    # bzw. hänge 0x00 Bytes hinzu, bis Länge kongruent 56 mod 64
    missing_bytes = (56 - (len(m) % 64)) % 64
    m += bytes(missing_bytes)
    # Hänge mL an (ml 64-bit Bitlänge der Nachricht)
    m += int_to_nbytes(mL, 8)
    assert len(m) % 64 == 0
    # Teile Nachricht in 512-bit (64-byte) Blöcke mit je 16 32-bit (4-byte) Wörtern auf
    return [[int_from_bytes(m[(bi+wi):(bi+wi+4)]) for wi in range(0, 64, 4)] for bi in range(0, len(m), 64)]


In [23]:
# sha1_preprocess Test
test_msg = b"Hello hash world, it's me - SHA"
test_preprocessed = sha1_preprocess(test_msg)
print(test_preprocessed)
assert len(test_preprocessed) == 1
assert len(test_preprocessed[0]) == 16

message length: 248 bits
[[1214606444, 1864394849, 1936203895, 1869769828, 740321652, 661856365, 1696607520, 1397244288, 0, 0, 0, 0, 0, 0, 0, 248]]


## 2. Verarbeitung der Blöcke

In [31]:
def left_rotate(i: int, n: int = 1) -> int:
    """ Left rotate (zyklischer Linksshift) um n Bit für 32-bit Unsigned Integer. """
    for _ in range(n):
        i = (i << 1) & 0xFFFFFFFF | ((i & 0x80000000) >> 31)
    return i

# left_rotate Test
lr_test = 0x80808080
lr_res = left_rotate(lr_test)
assert lr_res & 0b1 == 1
print(hex(lr_res))

def sha1_process_block(w: list, h: list):
    assert len(w) == 16
    assert len(h) == 5
    # Erweitere auf 80 Blöcke
    for i in range(16, 80):
        w_i = left_rotate(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16])
        w.append(w_i)
    print("words:", w)
    # Initialisiere Variablen
    a, b, c, d, e = h
    f, k = None, None
    # 80 Iterationen
    for i in range(80):
        if i < 20:
            # f = (b and c) or((not b) and d)
            # bitweises Nicht in Python funktioniert nur für signed integers, daher xor mit Einsen
            f = (b & c) | ((b ^ 0xFFFFFFFF) & d)
            k = 0x5A827999
        elif i < 40:
            # f = b xor c xor d
            f = b ^ c ^ d
            k = 0x6ED9EBA1
        elif i < 60:
            # f = (b and c) or (b and d) or (c and d)
            f = (b & c) | (b & d) | (c & d)
            k = 0x8F1BBCDC
        else:
            # f = b xor c xor d
            f = b ^ c ^ d
            k = 0xCA62C1D6
        # 32-bit Addition
        tmp = (left_rotate(a, 5) + f + e + k + w[i]) & 0xFFFFFFFF
        e = d
        d = c
        c = left_rotate(b, 30)
        b = a
        a = tmp
    h[0] = (h[0] + a) & 0xFFFFFFFF
    h[1] = (h[1] + b) & 0xFFFFFFFF
    h[2] = (h[2] + c) & 0xFFFFFFFF
    h[3] = (h[3] + d) & 0xFFFFFFFF
    h[4] = (h[4] + e) & 0xFFFFFFFF
    return h
    

0x1010101


## 3. Finaler Output

In [32]:
def h_to_hash(h: list) -> bytes:
    assert len(h) == 5
    # hash = h_0 || h_1 || h_2 || h_3 || h_4
    hash_out = bytes()
    for i in range(5):
        hash_out += int_to_nbytes(h[i], 4)
    return hash_out

## 4. Alles zusammen - SHA-1

In [33]:
def sha1(m: bytes) -> bytes:
    blocks = sha1_preprocess(m)
    h = np.array([0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0], dtype=np.uint32)
    for block in blocks:
        h = sha1_process_block(block, h)
    return h_to_hash(h)

In [34]:
# Test mit Testwert b""
empty_sha = sha1(b"")
print(empty_sha.hex())
print(hex(int_from_bytes(empty_sha)))
assert int_from_bytes(empty_sha) == 0xda39a3ee5e6b4b0d3255bfef95601890afd80709

message length: 0 bits
words: [2147483648, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 4, 0, 2, 8, 0, 0, 16, 0, 10, 32, 6, 0, 64, 8, 40, 128, 8, 0, 264, 0, 160, 512, 100, 0, 1032, 136, 668, 2048, 128, 40, 4232, 0, 2624, 8192, 1640, 128, 16520, 2176, 10440, 32768, 2216, 128, 67816, 0, 40960, 131200, 25600, 0, 264192, 34816, 171024, 524288, 32896, 10272, 1083584, 0, 671936, 2097280]
da39a3ee5e6b4b0d3255bfef95601890afd80709
0xda39a3ee5e6b4b0d3255bfef95601890afd80709


In [35]:
import hashlib

def reference_sha1(input: bytes) -> bytes:
    return hashlib.sha1(input).digest()

In [36]:
# Test mit längerer Testnachricht gegen hashlib-Referenz
test_msg_sha = sha1(test_msg)
print(test_msg_sha.hex())
test_msg_ref_sha = reference_sha1(test_msg)
print(test_msg_ref_sha.hex())
assert test_msg_sha == test_msg_ref_sha

message length: 248 bits
words: [1214606444, 1864394849, 1936203895, 1869769828, 740321652, 661856365, 1696607520, 1397244288, 0, 0, 0, 0, 0, 0, 0, 248, 1981454390, 10749962, 3197146102, 2084022398, 2471008444, 2505565239, 841943740, 2147595145, 3332044803, 1695941996, 2099396862, 1968873723, 3971824545, 3505080419, 1658741986, 3627090356, 3187111745, 4139222004, 295080961, 1134245862, 1994020011, 2824279993, 2883666807, 2549729268, 1546760203, 2587150888, 803230036, 1949753067, 3306914688, 517611125, 33365118, 4173958248, 3709115005, 1573569672, 1623866157, 2245839250, 2345968040, 2206067242, 3890367776, 4255137848, 1534140944, 2824157910, 4012069762, 1753372217, 3469609217, 345659243, 2803055156, 752485299, 3832260695, 2929728497, 1372558696, 345095085, 414317808, 1997590562, 505666898, 3275557722, 1314001870, 3769193624, 1715439971, 1303859810, 596326075, 1390287666, 567818918, 3315092294]
5bed0937094eef90922400f41a24492acdf70801
5bed0937094eef90922400f41a24492acdf70801


In [37]:
# Test mit langer Nachricht (mehrere Blöcke)
long_test_msg = b"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
long_test_msg_sha = sha1(long_test_msg)
print(long_test_msg_sha.hex())
long_test_msg_ref_sha = reference_sha1(long_test_msg)
print(long_test_msg_ref_sha.hex())
assert long_test_msg_sha == long_test_msg_ref_sha

message length: 4728 bits
words: [1282372197, 1830840688, 1937075488, 1685023855, 1914729321, 1948279149, 1702112288, 1668247155, 1702126964, 1970413683, 1633970544, 1935894894, 1730176364, 1769239084, 544433508, 543451489, 1714688058, 3097264144, 2158640304, 170977452, 2446423147, 4246897413, 1418168632, 1330685461, 1041055351, 3558113387, 2474454387, 1549114162, 86894608, 1319070326, 2636828328, 2768981737, 739832087, 4146351359, 1310348930, 247333657, 1857991544, 1710109641, 4064081405, 2716055518, 3373114293, 447956151, 4082255998, 2868028369, 3625296623, 4199595600, 3556299559, 1471295054, 2730456288, 1638117612, 143198101, 3325808578, 1248373004, 1839758244, 1575261530, 1293215831, 3935422751, 403539361, 3719563942, 4161812152, 3004218058, 1006486072, 2842036299, 2418776191, 4139053220, 770172296, 519862400, 1248781557, 318162473, 192565910, 2835046902, 2944952129, 2513879831, 3389157070, 3215023963, 950773444, 2231645389, 1041936148, 2667872187, 780164343]
words: [1830841967, 18