## Part III: Code Project


In [None]:
!pip install graphviz


### 1 - Merkle Tree Implementation
- Implement Merkle trees using SHA256 or some other hashing algorithms. Students can utilize existing crypto packages for hashing functions.
- Each leaf node references a plaintext file.
- Conduct a test with four leaf nodes, displaying the tree structure and hashes.
- Conduct a second test with six leaf nodes, displaying the tree structure and hashes.


### Conduct a test with four leaf nodes, displaying the tree structure and hashes.

In [21]:
import hashlib
from graphviz import Digraph

def sha256(data):
    return hashlib.sha256(data.encode('utf-8')).hexdigest()

class MerkleTree:
    def __init__(self, data_list):
        self.leaves = [sha256(data) for data in data_list]
        self.tree = self.build_tree(self.leaves)

    def build_tree(self, leaves):
        tree = [leaves]
        while len(leaves) > 1:
            if len(leaves) % 2 != 0:
                leaves.append(leaves[-1])
            leaves = [sha256(leaves[i] + leaves[i + 1]) for i in range(0, len(leaves), 2)]
            tree.append(leaves)
        return tree

    def get_root(self):
        return self.tree[-1][0] if self.tree else None

    def display_tree(self):
        for level in self.tree:
            print(level)

    def visualize_tree(self, filename='merkle_tree'):
        dot = Digraph()
        for i, level in enumerate(reversed(self.tree)):
            for j, node in enumerate(level):
                dot.node(f'{len(self.tree) - 1 - i}-{j}', node[:8])  # Display only the first 8 characters of the hash
                if i > 0:
                    parent_index = j // 2
                    dot.edge(f'{len(self.tree) - i}-{parent_index}', f'{len(self.tree) - 1 - i}-{j}')
        dot.render(filename, format='png', cleanup=True)
        return filename + '.png'

    def save_markdown(self, md_filename='merkle_tree.md', img_filename='merkle_tree.png'):
        with open(md_filename, 'w') as f:
            f.write(f'![Merkle Tree]({img_filename})\n')



In [31]:
# Part 3 - Merkle Tree

import hashlib
import os
from graphviz import Digraph

# Node class for Merkle tree
class Node:
    def __init__(self, left=None, right=None, hashValue=None, fileName=None):
        self.left = left
        self.right = right
        self.hash = hashValue  # store hash as bytes
        self.fileName = fileName  # non-null for leaf nodes

# Function to compute SHA256 hash of given data (in bytes)
def computeHash(data: bytes) -> bytes:
    return hashlib.sha256(data).digest()

# Build the Merkle tree from a list of file paths.
def buildMerkleTree(fileName):
    print('Here is file paths: ', fileName)
    # Create leaf nodes by reading file contents and computing their hashes.
    leaves = []
    for x in fileName:
        with open(x, 'rb') as f:
            data = f.read()
        hashData = computeHash(data)
        print('Here is hash value: \n', hashData)
        node = Node(hashValue=hashData, fileName=x)
        leaves.append(node)

    # Build the tree level by level until only the root remains.
    while len(leaves) > 1:
        # If the number of nodes is odd, duplicate the last node.
        if len(leaves) % 2 == 1:
            leaves.append(leaves[-1])
        temp = []
        # Pair nodes and compute parent node hash as SHA256(left.hash + right.hash)
        for i in range(0, len(leaves), 2):
            left = leaves[i]
            right = leaves[i+1]
            combined = left.hash + right.hash
            parentHash = computeHash(combined)
            parent = Node(left=left, right=right, hashValue=parentHash)
            temp.append(parent)
        leaves = temp
    return leaves[0]

# Recursively print the Merkle tree structure and hashes.
def printTree(node, level=0):
    if node.fileName:
        # This is a leaf node.
        print(f"Leaf - {node.fileName}: {node.hash.hex()}")
    else:
        # This is an internal node.
        print(f"Node: {node.hash.hex()}")
        if node.left:
            printTree(node.left, level+1)
        if node.right:
            printTree(node.right, level+1)

# Visualize the Merkle tree using graphviz
def visualizeTree(node, filename='merkle_tree'):
    dot = Digraph()
    def addNodesEdges(node, parent=None):
        if node.fileName:
            node_id = node.fileName
            label = f"{node.fileName}\n{node.hash.hex()[:8]}"
        else:
            node_id = node.hash.hex()
            label = node.hash.hex()[:8]
        dot.node(node_id, label)
        if parent:
            dot.edge(parent, node_id)
        if node.left:
            addNodesEdges(node.left, node_id)
        if node.right:
            addNodesEdges(node.right, node_id)
    addNodesEdges(node)
    dot.render(filename, format='png', cleanup=True)
    return filename + '.png'

# Create sample files based on the object key and values.
def savefile(fileObject):
    for fileName, content in fileObject.items():
        with open(fileName, 'w') as f:
            f.write(content)
        print("File saved: ", fileName)

# Define file names and contents
fileData = {
    "sampleFile1.txt": "Sample Data 1.",
    "sampleFile2.txt": "Sample Data 2.",
    "sampleFile3.txt": "Sample Data 3.",
    "sampleFile4.txt": "Sample Data 4."
}

savefile(fileData)

# Build the Merkle tree with the fileData Object keys(Name)
merkTree = buildMerkleTree(list(fileData.keys()))
print('\n')
print("Here is the Merkle Tree: ")
printTree(merkTree)

# Visualize the Merkle tree
img_file = visualizeTree(merkTree, 'merkle_tree')
print(f"Merkle tree visualization saved as {img_file}")

File saved:  sampleFile1.txt
File saved:  sampleFile2.txt
File saved:  sampleFile3.txt
File saved:  sampleFile4.txt
Here is file paths:  ['sampleFile1.txt', 'sampleFile2.txt', 'sampleFile3.txt', 'sampleFile4.txt']
Here is hash value: 
 b'\xe6\x9bh.\xc3\xa1\xcd\x19\x0e\x86\x19\x97 b\x95Pb\r\x04\xa36\xc0) \xbf\xa25\x9e{\xa1\xc6$'
Here is hash value: 
 b'\x11qI\xc2k\xa5_\xf6\xcc\xcf\x91;\xaf.\xa8\xcf\xb9F["\xd7D\t\xd3\xb2\xec\xc8\xa0g\xd1(x'
Here is hash value: 
 b'\xf4\x1c\x04\xaa\x04\x0b\xdd)n\xa1\x8f\xd6\xe9x\x81-t\x1ba\x9f!\x97W\x1d\xb0\x03uZ\xa7 \xcej'
Here is hash value: 
 b'\xef\xfb\xa6f\x0f&\xc0\xeb\xdb-Y\x06\xf5/\x8d`\x8e\t\x07\x14\x95H\x98\x04 O\xa2\x95\xc1\xec;\xd3'


Here is the Merkle Tree: 
Node: ddf61bf634b0b650ff4441dca2478a126789aabbce062457dc4dbc8b24534bee
Node: bb86d04919c8fb4b512041626ecbc06a1cc91e0af6265cfddd3f3e3e64dd79f7
Leaf - sampleFile1.txt: e69b682ec3a1cd190e86199720629550620d04a336c02920bfa2359e7ba1c624
Leaf - sampleFile2.txt: 117149c26ba55ff6cccf913baf2ea8cfb9

### Visual representation of tree:
![Merkle Tree](merkle_tree.png)

In [25]:
# Example usage
# Generate 4 text files with different content each
data_list = []
for i in range(1, 5):
    with open(f'file{i}.txt', 'w') as f:
        f.write(f'This is the content of file {i}.\n')
    with open(f'file{i}.txt', 'r') as f:
        data_list.append(f.read())

merkle_tree = MerkleTree(data_list)
merkle_tree.display_tree()
print(f"Root Hash: {merkle_tree.get_root()}")
img_file = merkle_tree.visualize_tree('merkle_tree_orig_4_nodes')

['b5589cac35442c4ac06e4ea1749a9c5f9bdf158081abd94fe4700df4beb2d99d', '5af94e431a67ac051e9ebe6cbffba1929b05aabf6334eb0d8b5d391154d27d00', '3e7dfac18d844fca9f8895416e8fd5b1a3b8bd21cfed79483deede6594222f24', 'c17106bd1d5efd752aba18667d29461169cdfe6969cdfec86c3b5b63c85d7acb']
['312c56be398bdd9fb52ff109358c2dc9663e12f311cb0fd3145151d470024f49', 'cf391a952852f5cd5f6df2bf89c66cbe488cd31b371b9aa1ce1feb869821e97f']
['8e909b5d2fd4df2d058a74b42e44d4f522c3dab96cc76cbb595b7f044e4c074b']
Root Hash: 8e909b5d2fd4df2d058a74b42e44d4f522c3dab96cc76cbb595b7f044e4c074b


### Visual representation of tree:
![Merkle Tree](merkle_tree_orig_4_nodes.png)

### Conduct a second test with six leaf nodes, displaying the tree structure and hashes.

In [28]:
# Example usage
# Generate 6 text files with different content each (duplicating the last 2 files to balance the tree)
data_list = []
for i in range(1, 7):
    with open(f'file{i}.txt', 'w') as f:
        f.write(f'This is the content of file {i}.\n')
    with open(f'file{i}.txt', 'r') as f:
        data_list.append(f.read())

data_list.append(data_list[-1])
data_list.append(data_list[-1])
merkle_tree = MerkleTree(data_list)
merkle_tree.display_tree()
print(f"Root Hash: {merkle_tree.get_root()}")
img_file = merkle_tree.visualize_tree('merkle_tree_orig_6_nodes')

['b5589cac35442c4ac06e4ea1749a9c5f9bdf158081abd94fe4700df4beb2d99d', '5af94e431a67ac051e9ebe6cbffba1929b05aabf6334eb0d8b5d391154d27d00', '3e7dfac18d844fca9f8895416e8fd5b1a3b8bd21cfed79483deede6594222f24', 'c17106bd1d5efd752aba18667d29461169cdfe6969cdfec86c3b5b63c85d7acb', 'b14677e78354c132d2126ca6abbac366422ff5199009868422549dac0c20c4a7', '2592df5550a8214c14487868b93233499a200bc137c1ff72257ac506e4eb7be0', '2592df5550a8214c14487868b93233499a200bc137c1ff72257ac506e4eb7be0', '2592df5550a8214c14487868b93233499a200bc137c1ff72257ac506e4eb7be0']
['312c56be398bdd9fb52ff109358c2dc9663e12f311cb0fd3145151d470024f49', 'cf391a952852f5cd5f6df2bf89c66cbe488cd31b371b9aa1ce1feb869821e97f', '2aed9c99cae6d2d8d241d165954ae521d17f7157f35bcf78356ab3034b866ef6', '668b15a36b7d85b648bc299672f332370971e6bdbd2b8ef4e3a8929d4b8ca0d5']
['8e909b5d2fd4df2d058a74b42e44d4f522c3dab96cc76cbb595b7f044e4c074b', 'e0ca8dcd2998bd47e8e40a9ba3a5574c46b0f2a4b65c9c475395b634b1179d66']
['bae910a7129fdb0574d6ba1e840747f6c415db3269e

### Visual representation of tree:
![Merkle Tree](merkle_tree_orig_6_nodes.png)

In [30]:
# Modify the contents of 'file2.txt'
with open('file2.txt', 'w') as f:
    f.write('This is the modified content of file 2.\n')

# Read the updated contents of the files
data_list = []
for i in range(1, 5):
    with open(f'file{i}.txt', 'r') as f:
        data_list.append(f.read())

# Rebuild the Merkle tree with the updated data
merkle_tree = MerkleTree(data_list)
merkle_tree.display_tree()
print(f"Root Hash: {merkle_tree.get_root()}")
img_file = merkle_tree.visualize_tree('merkle_tree_updated_4_nodes')

['b5589cac35442c4ac06e4ea1749a9c5f9bdf158081abd94fe4700df4beb2d99d', '1d68662e61e5b506da002b064ed5d8f9cf1f50df38cda86f04e4ce5f2a7fe5b0', '3e7dfac18d844fca9f8895416e8fd5b1a3b8bd21cfed79483deede6594222f24', 'c17106bd1d5efd752aba18667d29461169cdfe6969cdfec86c3b5b63c85d7acb']
['9cd28de5b24362d42d507fde8509f53c2c36146ab284ea01f845db3f2fd2e1c6', 'cf391a952852f5cd5f6df2bf89c66cbe488cd31b371b9aa1ce1feb869821e97f']
['bba9f404e9cdf0efc2ca6a63dd9626699f67fc73dce6919c68a6771675f6d44c']
Root Hash: bba9f404e9cdf0efc2ca6a63dd9626699f67fc73dce6919c68a6771675f6d44c


### Updated contents of file 2 representation of tree:
#### Original 4 nodes
![Merkle Tree](merkle_tree_orig_4_nodes.png)
#### Updated file 2
![Merkle Tree](merkle_tree_updated_4_nodes.png)