In [5]:
import random
import numpy as np
import hashlib
# get contract from blockchain in order to interact with it
import json
import web3
import math
from tqdm.notebook import tqdm, trange

In [6]:
encoding = "utf-8"
N = 100
percent = 5
prevalence = (percent * N) /100.0
p = 5/100.0
ganache_url = 'http://127.0.0.1:7545'
contract_address="0xf6cE8fAE2E0b3c30Ec5597ff6AB4c5C16B21dBcc"

In [7]:
class Object:
    def __init__(self,id,corruption_rate,pool_id=0):
        self.id=id
        # sha256 string
        self.pool_id=pool_id
        # discuss if the pool_id should be hashed with the object in a real case
        self.hash=hashlib.sha256(str(id).encode(encoding)).hexdigest()
        # corruption rate from 0.01 to 0.2
        self.corruption_rate=corruption_rate

        self.is_corruped = False
    def to_string(self):
        return "id: " + str(self.id) + " sha256: " + self.hash + " corruption_rate: " + str(self.corruption_rate)
        
class Pool:
    def __init__(self,objects,id=-1,transaction=""):
        # list of objects in the pool
        self.objects=objects
        # sha256 root hash of the hash-list
        self.hash=hashlib.sha256("".join([obj.hash for obj in objects]).encode(encoding)).hexdigest()
        # reference to the pool, integer from 1 - inf 
        self.id=id
        # transaction hash on the ethereum blockchain
        self.transaction=transaction
    def to_string(self):
        return "PoolId: " + str(self.id) + " with " + str(len(self.objects)) +" objects in pool"#

class Archive:
    def __init__(self,objects):
        self.objects=objects

    def retrieveObj(self,id):
        return next(obj for obj in objects if obj.id == id)

    def get_objects_by_pool_id(self,pool_id):
        return [obj for obj in self.objects if obj.pool_id == pool_id]
    
    def get_sample(self,n):
        return random.sample(self.objects,n)

    def corrupt(self,p):
        for obj in self.objects:
            if(random.uniform(0, 1)<p):
                obj.hash=hashlib.sha256((str(obj.id) + "x").encode(encoding)).hexdigest()
                obj.is_corruped=True

    def repair(self,pool_id):
        global write_tx_count
        write_tx_count=write_tx_count+1
        global metaTx
        metaTx.update({"nonce":w3.eth.getTransactionCount(sender)})
        contract.functions.setPoolHash(pool.id,pool.hash).transact(metaTx)
        # TODO what happens if a corrupt pool was found
        return 0
    def clean(self):
        print("Cleanup archive")
    
def optimal_size(prevalence,N):
    # https://www.sciencedirect.com/science/article/pii/S1201971220306925
    # poolsize = 1.24* p/N ^-0.466
    # cannot be too big, I have to find something to prevent too large pools
    return round(1.24 * math.pow(prevalence / N,-0.466))

w3 = web3.Web3(web3.HTTPProvider(ganache_url))

sender = w3.eth.accounts[0]
balance = w3.fromWei(w3.eth.get_balance(sender),"ether")
tx_count = w3.eth.getTransactionCount(sender)
print("Transaction Count {}".format(tx_count))
print("ETH Balance: {}".format(balance))
print("Sender Account: {}".format(sender))
compiled_contract_path = '../sol/build/contracts/FixityStorage.json'
# check contract address if this cell fails
deployed_contract_address = w3.toChecksumAddress(contract_address)
print("Contract Deployed at: {}".format(deployed_contract_address))

with open(compiled_contract_path) as file:
    contract_json = json.load(file)  # load contract info as JSON
    contract_abi = contract_json['abi']  # fetch contract's abi - necessary to call its functions

# Fetch deployed contract reference
contract = w3.eth.contract(address=deployed_contract_address, abi=contract_abi)
print("Contract Functions: {}".format(contract.all_functions()))

# TODO get real world data 
gas = 2000000
gasPrice = w3.toWei('50', 'gwei')

metaTx = {
    #"nonce":w3.eth.getTransactionCount(sender) nonce is set on transaction call
    "from":sender,
    "to":deployed_contract_address,
    "gas": gas,
    "gasPrice": gasPrice
}

Transaction Count 29
ETH Balance: 99.96515696
Sender Account: 0x18648B486Bd6B771DB957590E988A2464F22BfCd
Contract Deployed at: 0xf6cE8fAE2E0b3c30Ec5597ff6AB4c5C16B21dBcc
Contract Functions: [<Function getPoolHash(uint32)>, <Function setPoolHash(uint32,bytes32)>]


In [8]:
# create objects
objects = [Object(i,random.uniform(0,p)) for i in range(0,N)]
assert objects[2].hash == hashlib.sha256("2".encode(encoding)).hexdigest()
test_n = 9
testpool = Pool(objects[0:test_n])
hashlist =""
for i in range(test_n):
    hashlist+=hashlib.sha256(str(i).encode(encoding)).hexdigest()

assert testpool.hash == hashlib.sha256(hashlist.encode(encoding)).hexdigest()

k = optimal_size(prevalence,N)

#create pools
pools=[]
for i in range((len(objects) + k - 1) // k ):
    # assign pool id to each object in the pool
    for obj in objects[i * k:(i + 1) * k]:
        obj.pool_id=i
    pools.append(Pool(objects[i * k:(i + 1) * k],i))

assert pools[0].id == 0
assert len(pools[0].objects) == len(pools[1].objects)
assert pools[len(pools)-1].hash

write_tx_count = 0
read_tx_count = 0
# persist pools on the blockchain
for pool in tqdm(pools):
    metaTx.update({"nonce":w3.eth.getTransactionCount(sender)})
    tx_hash = contract.functions.setPoolHash(pool.id,pool.hash).transact(metaTx)
    write_tx_count = write_tx_count + 1 

# test if persistence on the blockchain was successfull
pool_id = pools[0].id
poolHashBytes = contract.functions.getPoolHash(pool_id).call()
read_tx_count = read_tx_count + 1
assert poolHashBytes.hex() == pools[pool_id].hash

# create an mock archive
archive = Archive(objects)

assert archive.objects[k*2].pool_id == 2 
assert Pool(archive.get_objects_by_pool_id(2)).hash == pools[2].hash
assert objects[2].hash==archive.retrieveObj(2).hash
# write transactions have to be exactly the number of pools at this stage
assert write_tx_count == len(pools)
sample = archive.retrieveObj(0)
assert sample.pool_id==0
pool_of_sample = Pool(archive.get_objects_by_pool_id(sample.pool_id))
assert pool_of_sample.hash == pools[0].hash
pool_in_blockchain = contract.functions.getPoolHash(sample.pool_id).call()
assert pool_of_sample.hash == pool_in_blockchain.hex()

# corrupt the archive
archive.corrupt(p)

# clean up the archive
already_cleaned_pool_ids = set()
corrupted_objects_count = 0
for obj in tqdm(archive.objects):
    pool_of_sample = Pool(archive.get_objects_by_pool_id(obj.pool_id))
    pool_in_blockchain = contract.functions.getPoolHash(obj.pool_id).call()

    # is the local pool hash the same as the one in the blockchain? and make sure to not double repair a pool
    if(pool_of_sample.hash != pool_in_blockchain.hex() and obj.pool_id not in already_cleaned_pool_ids):
        archive.repair(obj.pool_id)
        already_cleaned_pool_ids.add(obj.pool_id)

# info prints
fin_balance = w3.fromWei(w3.eth.get_balance(sender),"ether")
fin_tx_count = w3.eth.getTransactionCount(sender)
print("Optimal poolsize {} with prevalence {} in N={}".format(k,prevalence,N))
print("{} objects distributed in {} pools with size={} last pool with size={}".format(N,len(pools),k,len(pools[len(pools)-1].objects)))
print("Transaction Count: {} ".format(fin_tx_count - tx_count))
p_ = 1 - math.pow((1-p),k)
T = N/k + p_*(N/k)
print("Expected number of transactions: {}".format(T))
print("Total Cost=ETH {} for {} transactions ".format((balance - fin_balance),(fin_tx_count - tx_count)))
print("Theoretical amount of write transactions: {} with N={} + prevalence={}".format((N+prevalence),N,prevalence))
print("Repairing transactions={}, versus naive reparing transactions prevalence={}".format(fin_tx_count-tx_count-len(pools),prevalence))
print("Number of Distinct Cleaned Pools: {}".format(len(already_cleaned_pool_ids)))




  0%|          | 0/20 [00:00<?, ?it/s]

BadFunctionCallOutput: Could not transact with/call contract function, is contract deployed correctly and chain synced?