# Lecture 1 hashes and datastructures

In [None]:
#Import statements
import hashlib as hasher

def hashbits(input):
    hash_obj = hasher.sha256()
    inputbytes = input.encode()
    # print(type(inputbytes))
    hash_obj.update(inputbytes)
    hashbytes = hash_obj.digest()
    return ''.join(f'{x:08b}' for x in hashbytes)



def mdbits(input):
    hash_obj = hasher.md5()
    inputbytes = input.encode()
    # print(type(inputbytes))
    hash_obj.update(inputbytes)
    hashbytes = hash_obj.digest()
    return ''.join(f'{x:08b}' for x in hashbytes)

def hash(input):
    hash_obj = hasher.sha256()
    inputbytes = input.encode()
    #print(type(inputbytes))
    hash_obj.update(inputbytes)
    return hash_obj.hexdigest()

In [None]:
print(hashbits("hello world!"))
print(hash("hello world!"))

## Exercise 1

### Hashes look random but are deterministic:
* Try out different hash functions, find out how many bytes/bits they return (sha1, md5, sha512)
* Try out hashing the same value twice. Does it give the same result?
* Try concatenating a two strings in different orders, do they hash to the same value?
* Hash 10 different strings, how many have the first bit 0?
* Hash 160 different strings, how many have the first character 0 in hexadecimal representatnion?
* Hash 50 different strings and count the number of 1s and 0s in the binary representation of each hash. Are the numbers of 1s and 0s roughly equal?
* Find a different test to check if the hashes are random?

## Exercise 2

### Avalanche Effect
1. Hash a string of your choice (e.g., "blockchain") and note the output. Then, change a single character in the string (e.g., "Blockchain") and hash it again. Compare the two hashes. How many bits differ between them?

2. Try this with different hash functions and different small changes in the input (e.g., changing one letter, adding a space). How consistent is the avalanche effect across different functions?

## Exercise 3

### Collision resistance
1. Compute hashes of differnt strings, until you find one that ends with the same hexadecimal number as the hash of "*hello world*". How many did you have to try?
2. Compute hashes of different stings, until you have found 2 that end with the same hexdecimal number. How many did you have to try?

## Exercise 4

### Efficiency
Write a small program that hashes a large number of strings (e.g., 100000) using different hash functions (e.g., SHA-1, MD5, SHA-256). Measure and compare the time taken by each function.

In [None]:
import time

# Define the list of strings to hash
string = "Hello, World!"

# Define the hash functions to use
hash_functions = [hasher.md5, hasher.sha1, hasher.sha256, hasher.sha512, hasher.sha3_256]

# Iterate over each hash function
for hash_func in hash_functions:
    # Start the timer
    start_time = time.time()
    
    # Hash each string using the current hash function
    for i in range(1000000):
        hash_obj = hash_func()
        hash_obj.update((string+str(i)).encode())
        hash_result = hash_obj.hexdigest()
        #print(hash_result)  # Uncomment this line to print the hash result
    
    # Calculate the elapsed time
    elapsed_time = time.time() - start_time
    
    # Print the elapsed time for the current hash function
    print(f"Hash function: {hash_func.__name__}, Elapsed time: {elapsed_time} seconds")

## Exercise 5

### Hash chain
Below is a stub for Block and Hashlist classes.
1. Complete `hash_block`, `add` and `check` functions.
2. Create a hash list with 3 or more blocks and show that check works as expected.
3. Verify that check detects changes in the data in the list.
4. Verify that check detects, if a new Block is inserted somewhere in the list.
5. Update the HashList and Block classes:
    * Create a dictionary where blocks are stored indexed by their hash
    * Update `self.last` to only store the hash of the last block
    * Remove the `previous` pointer from the `Block`
    * Update the `check` and `add` function to use the dictionary.
        Make sure it handles cases where the block is not present.


In [None]:
class Block:
    def __init__(self, data, previous=None):
        self.data = data
        if previous is None:
            self.previous = None
            self.previous_hash = ""
        else:
            self.previous = previous
            self.previous_hash = previous.hash
        self.hash = self.hash_block()

    def hash_block(self):
        #add this function
        # return the hash of the block as hexadecimal string
        pass

class HashList:
    def __init__(self, genesis_data):
        self.last = Block(genesis_data)
    
    def add(self,data):
        #create a new block at the end of the chain and update self.last.
        pass

    def check(self):
        #check if all the previous hashes in the list are correct.
        #return True or False
        return False

## Exercise 6 Merkle tree
Complete the methods below, to 
- generate the merkle root, 
- generate a merkle proof and 
- check a merkle proof

In [None]:
class MerkleTree:
    def __init__(self, datalist, length):
        # oops, if length is not a power of 2, this will not work.
        self.length = length
        if len(datalist) > length:
            print("Too many data items")
        for i in range(length):
            if i >= len(datalist):
                datalist.append("")
        self.hashes = [hash(d) for d in datalist]
        self.root = "" # compute merkle root
    def getroot(self):
        return self.root
    def getproof(self, index):
        #return the proof for the data item at index
        #as a list of hashes
        #the proof should be the sibling hashes on the path to the root
        #if the index is too large, return an empty list
        return []
        
    
def checkproof(root, index, proof, length, data):
        #check if the proof is correct for the data item at index
        #return True or False
        return False


In [None]:
mt = MerkleTree(["a","b","c","d"], 4)
print(mt.hashes)

p = mt.getproof(0)
print(p)

print(checkproof(mt.getroot(), 0, p, 4, "a"))