In [157]:
import random
import numpy as np
import hashlib
import web3
import json

RQ 1 To what extend can pooled object hashes increase the transaction throughput and reduce cost for a fixity information storage service on the Ethereum blockchain?

RQ 2 What is the optimal pool size based on the corruption rates of digital objects in the archive in terms of transaction throughput and cost?

RQ 3 Given that metadata has a higher corruption rate, what effect has the split of metadata and objects on the operation cost?

In [1]:

# creation of N = 10k objects and assigning them corruption rate from 1% to 20% 
encoding = "utf-8"
N = 100
percent = 10
prevalence = (percent * N) /100.0
p = percent/100.0

class Object:
    def __init__(self,id,corruption_rate,pool_id=0):
        self.id=id
        # sha256 string
        self.pool_id=pool_id
        # discuss if the pool_id should be hashed with the object in a real case
        self.hash=hashlib.sha256(str(id).encode(encoding)).hexdigest()
        # corruption rate from 0.01 to 0.2
        self.corruption_rate=corruption_rate

        self.is_corruped = False
    def to_string(self):
        return "id: " + str(self.id) + " sha256: " + self.hash + " corruption_rate: " + str(self.corruption_rate)
        
objects = [Object(i,random.uniform(0,p)) for i in range(0,N)]

for i in range(0,3):
    print(objects[i].to_string())

assert objects[2].hash == hashlib.sha256("2".encode(encoding)).hexdigest()


NameError: name 'random' is not defined

In [159]:
# RQ 2
# TODO finding the optimal pool size "k" with a bernoulli experiment based on the corruption rate
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8242460/
import math
def optimal_size(prevalence,N):
    # https://www.sciencedirect.com/science/article/pii/S1201971220306925
    # poolsize = 1.24* p/N ^-0.466
    #return 100
    return round(1.24 * math.pow(prevalence / N,-0.466)) 
k = optimal_size(prevalence,N)
print("Optimal poolsize {} with prevalence {} in N={}".format(k,prevalence,N))
print("Net analyses required for one object: {}".format(1/k-math.pow((1-p),k)+1))

Optimal poolsize 4 with prevalence 10.0 in N=100
Net analyses required for one object: 0.5939


In [160]:
# creation of J pools, fill them with objects and assign each object to a pool
class Pool:
    def __init__(self,objects,id=-1,transaction=""):
        # list of objects in the pool
        self.objects=objects
        # sha256 root hash of the hash-list
        self.hash=hashlib.sha256("".join([obj.hash for obj in objects]).encode(encoding)).hexdigest()
        # reference to the pool, integer from 1 - inf 
        self.id=id
        # transaction hash on the ethereum blockchain
        self.transaction=transaction
    def to_string(self):
        return "PoolId: " + str(self.id) + " with " + str(len(self.objects)) +" objects in pool"

test_n = 9
testpool = Pool(objects[0:test_n])
hashlist =""
for i in range(test_n):
    hashlist+=hashlib.sha256(str(i).encode(encoding)).hexdigest()

assert testpool.hash == hashlib.sha256(hashlist.encode(encoding)).hexdigest()

In [161]:
# Pool Creation
# https://www.geeksforgeeks.org/break-list-chunks-size-n-python/
# split the list of pools into equal chunks of size k, the last pool is filled with remainders e.g. N= 14 k=3 = [3,3,3,3,2]
#pools = [Pool(i,objects[i * k:(i + 1) * k]) for i in range((len(objects) + k - 1) // k )]

pools=[]
for i in range((len(objects) + k - 1) // k ):
    # assign pool id to each object in the pool
    for obj in objects[i * k:(i + 1) * k]:
        obj.pool_id=i
    pools.append(Pool(objects[i * k:(i + 1) * k],i))

print("{} objects distributed in {} pools with size={} last pool with size={}".format(N,len(pools),k,len(pools[len(pools)-1].objects)))
assert pools[0].id == 0
assert len(pools[0].objects) == len(pools[1].objects)
assert pools[len(pools)-1].hash

100 objects distributed in 25 pools with size=4 last pool with size=4


In [162]:
ganache_url = 'http://127.0.0.1:8545'
contract_address="0x2C3c7752cE837bB97D6C31B4f883EAAA92BC3Ce5"
w3 = web3.Web3(web3.HTTPProvider(ganache_url))

sender = w3.eth.accounts[0]
balance = w3.fromWei(w3.eth.get_balance(sender),"ether")
tx_count = w3.eth.getTransactionCount(sender)
print("Transaction Count {}".format(tx_count))
print("ETH Balance: {}".format(balance))
print("Sender Account: {}".format(sender))
compiled_contract_path = '../sol/build/contracts/FixityStorage.json'
# check contract address if this cell fails
deployed_contract_address = w3.toChecksumAddress(contract_address)
print("Contract Deployed at: {}".format(deployed_contract_address))

with open(compiled_contract_path) as file:
    contract_json = json.load(file)  # load contract info as JSON
    contract_abi = contract_json['abi']  # fetch contract's abi - necessary to call its functions

# Fetch deployed contract reference
contract = w3.eth.contract(address=deployed_contract_address, abi=contract_abi)
print("Contract Functions: {}".format(contract.all_functions()))

Transaction Count 191
ETH Balance: 999.713875679871284508
Sender Account: 0xAF8725604990d46042A50EfD9e2cB118141Bb140
Contract Deployed at: 0x2C3c7752cE837bB97D6C31B4f883EAAA92BC3Ce5
Contract Functions: [<Function getPoolHash(uint32)>, <Function setPoolHash(uint32,bytes32)>]


In [163]:
# TODO get real world data 
gas = 2000000
gasPrice = w3.toWei('50', 'gwei')

metaTx = {
    #"nonce":w3.eth.getTransactionCount(sender) nonce is set on transaction call
    "from":sender,
    "to":deployed_contract_address,
    "gas": gas,
    "gasPrice": gasPrice
}
print(metaTx)
print(w3.eth.getTransactionCount(sender))

{'from': '0xAF8725604990d46042A50EfD9e2cB118141Bb140', 'to': '0x2C3c7752cE837bB97D6C31B4f883EAAA92BC3Ce5', 'gas': 2000000, 'gasPrice': 50000000000}
191


In [164]:
# persist each pool on the blockchain, for each pool perform a transaction
# RQ 1 write_tx_count * gasPrice * gas or just read it from ganache
from tqdm.notebook import tqdm, trange
import time
write_tx_count = 0
read_tx_count = 0

for pool in tqdm(pools):
    metaTx.update({"nonce":w3.eth.getTransactionCount(sender)})
    tx_hash = contract.functions.setPoolHash(pool.id,pool.hash).transact(metaTx)
    write_tx_count = write_tx_count + 1 
    #print("Persisting pool "+str(pool.id)+" with hash: "+str(pool.hash)+" in transaction " + tx_hash.hex() + " succeeded")

  0%|          | 0/25 [00:00<?, ?it/s]

In [165]:
pools[0].hash

'44e7a7f7eae93fd3e0d52cfd81347de28c7f9312c6ec662617320e269a4243d9'

In [166]:
# test if persistence on the blockchain was successfull
pool_id = pools[0].id
poolHashBytes = contract.functions.getPoolHash(pool_id).call()
read_tx_count = read_tx_count + 1
assert poolHashBytes.hex() == pools[pool_id].hash

In [167]:
# "ingest" objects into the "archive"
import random

class Archive:
    def __init__(self,objects):
        self.objects=objects

    def retrieveObj(self,id):
        return next(obj for obj in objects if obj.id == id)

    def get_objects_by_pool_id(self,pool_id):
        return [obj for obj in self.objects if obj.pool_id == pool_id]
    
    def get_sample(self,n):
        return random.sample(self.objects,n)

    def corrupt(self,p):
        for obj in self.objects:
            if(random.uniform(0, 1)<p):
                obj.hash=hashlib.sha256((str(obj.id) + "x").encode(encoding)).hexdigest()
                obj.is_corruped=True

    def repair(self,pool_id):
        global write_tx_count
        write_tx_count=write_tx_count+1
        global metaTx
        metaTx.update({"nonce":w3.eth.getTransactionCount(sender)})
        contract.functions.setPoolHash(pool.id,pool.hash).transact(metaTx)
        # TODO what happens if a corrupt pool was found
        return 0
    def clean(self):
        print("Cleanup archive")

archive = Archive(objects)

assert archive.objects[k*2].pool_id == 2 
assert Pool(archive.get_objects_by_pool_id(2)).hash == pools[2].hash
assert objects[2].hash==archive.retrieveObj(2).hash
# write transactions have to be exactly the number of pools at this stage
assert write_tx_count == len(pools)

In [168]:
print("Write transactions after ingest: " + str(write_tx_count))

Write transactions after ingest: 25


In [169]:
sample = archive.retrieveObj(0)
assert sample.pool_id==0
pool_of_sample = Pool(archive.get_objects_by_pool_id(sample.pool_id))
assert pool_of_sample.hash == pools[0].hash
pool_in_blockchain = contract.functions.getPoolHash(sample.pool_id).call()
assert pool_of_sample.hash == pool_in_blockchain.hex()

In [170]:
# update, zuerst schauen ob de rpool passt
id = 0
original_hash = archive.objects[id].hash
archive.objects[id].hash=original_hash

archive_pool = Pool(archive.get_objects_by_pool_id(archive.objects[id].pool_id),id=id)
pool_in_blockchain = contract.functions.getPoolHash(archive.objects[id].pool_id).call()
assert archive_pool.hash == pool_in_blockchain.hex()

# falls ja das objekt ändern
archive.objects[id].hash=original_hash+"x"
# erneut die objekte aus dem pool im archiv holen und auf der blockchain persistieren
updated_archive_pool = Pool(archive.get_objects_by_pool_id(archive.objects[id].pool_id),id=id)
assert updated_archive_pool.hash!=pool_in_blockchain.hex()
assert updated_archive_pool.id==id

metaTx.update({"nonce":w3.eth.getTransactionCount(sender)})
tx_hash = contract.functions.setPoolHash(updated_archive_pool.id,updated_archive_pool.hash).transact(metaTx)
write_tx_count = write_tx_count + 1 

# get the hash with pool id
updated_pool_in_blockchain = contract.functions.getPoolHash(archive.objects[id].pool_id).call()
assert pool_in_blockchain.hex() != updated_pool_in_blockchain.hex()

In [171]:
archive.corrupt(p)

In [172]:
print("Write transactions before cleaning: " + str(write_tx_count))
# repair every object in the archive
already_cleaned_pool_ids = set()
corrupted_objects_count = 0
for obj in tqdm(archive.objects):
    pool_of_sample = Pool(archive.get_objects_by_pool_id(obj.pool_id))
    pool_in_blockchain = contract.functions.getPoolHash(obj.pool_id).call()

    # is the local pool hash the same as the one in the blockchain? and make sure to not double repair a pool
    if(pool_of_sample.hash != pool_in_blockchain.hex() and obj.pool_id not in already_cleaned_pool_ids):
        write_tx_count=write_tx_count+1
        metaTx.update({"nonce":w3.eth.getTransactionCount(sender)})
        contract.functions.setPoolHash(pool.id,pool.hash).transact(metaTx)
        already_cleaned_pool_ids.add(obj.pool_id)

print("Number of Distinct Cleaned Pools: {}".format(len(already_cleaned_pool_ids)))
print("Write transactions after cleaning: {}".format(write_tx_count))


Write transactions before cleaning: 26


  0%|          | 0/100 [00:00<?, ?it/s]

Number of Distinct Cleaned Pools: 14
Write transactions after cleaning: 40


In [173]:
fin_balance = w3.fromWei(w3.eth.get_balance(sender),"ether")
fin_tx_count = w3.eth.getTransactionCount(sender)
print("Optimal poolsize {} with prevalence {} in N={}".format(k,prevalence,N))
print("{} objects distributed in {} pools with size={} last pool with size={}".format(N,len(pools),k,len(pools[len(pools)-1].objects)))
print("Transaction Count: {} ".format(fin_tx_count - tx_count))
print("Total Cost=ETH {} for {} transactions ".format((balance - fin_balance),(fin_tx_count - tx_count)))
print("Theoretical amount of write transactions: {} with N={} + prevalence={}".format((N+prevalence),N,prevalence))
print("Repairing transactions={}, versus naive reparing transactions prevalence={}".format(fin_tx_count-tx_count-len(pools),prevalence))
print("Number of Distinct Cleaned Pools: {}".format(len(already_cleaned_pool_ids)))



Optimal poolsize 4 with prevalence 10.0 in N=100
100 objects distributed in 25 pools with size=4 last pool with size=4
Transaction Count: 40 
Total Cost=ETH 0.052997200000000000 for 40 transactions 
Theoretical amount of write transactions: 110.0 with N=100 + prevalence=10.0
Repairing transactions=15, versus naive reparing transactions prevalence=10.0
Number of Distinct Cleaned Pools: 14


In [174]:
# TODO optimal pool size!!!!
# TODO make a pretty result table 
# TODO experiment on online network 

In [175]:
# TODO nächstes mal auf ropsten testnet deployen und schauen was ich für einen throughput habe ud die kosten logge
# simulieren soll ich den throughput und die kosten kann ich vorberechnen
# rauber intressiert auch wie der thorughput vorberechnet ist und danach wie groß der unterschied zum tatsächliche throughput ist