In [1]:
import mdptoolbox
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse as ss

In [56]:
def getAdoptMatrices(rho, underpaying=True):
    # creating the adopt transition & reward matrices
    adopt_transitions = np.zeros(shape = (num_states, num_states))
    adopt_rewards = np.zeros(shape = (num_states, num_states))

    # each adopt matrix only can map to (1,0,irrelevant) or (0,1,irrelevant)
    adopt_new_state_1_index = state_mapping[(1, 0, 'irrelevant')]
    adopt_new_state_2_index = state_mapping[(0, 1, 'irrelevant')]
    for state_index in range(num_states):
        state = states[state_index]
        adopt_transitions[state_index, adopt_new_state_1_index] = alpha
        adopt_transitions[state_index, adopt_new_state_2_index] = 1 - alpha
        adopt_rewards[state_index, adopt_new_state_1_index] = rho * state[1]
        adopt_rewards[state_index, adopt_new_state_2_index] = rho * state[1]
        if ((state[0] == T) or (state[1] == T)) and (state[0] != state[1]):
            # overpaying
            if not underpaying:
                # attacker ahead
                if state[0] > state[1]: 
                    adopt_rewards[state_index, adopt_new_state_1_index] = overpayAttackerAhead(state[0], state[1], rho)
                    adopt_rewards[state_index, adopt_new_state_2_index] = overpayAttackerAhead(state[0], state[1], rho)
                # honest ahead
                else: 
                    adopt_rewards[state_index, adopt_new_state_1_index] = overpayHonestAhead(state[0], state[1], rho)
                    adopt_rewards[state_index, adopt_new_state_2_index] = overpayHonestAhead(state[0], state[1], rho)
    
    # making matrices sparse
    return ss.csr_matrix(adopt_transitions), ss.csr_matrix(adopt_rewards)

In [57]:
def getOverrideMatrices(rho, underpaying=True):
    # creating the override transition & reward matrices
    override_transitions = np.zeros(shape = (num_states, num_states))
    override_rewards = np.zeros(shape = (num_states, num_states))

    for state_index in range(num_states):
        state = states[state_index]
        # checking if we are at the max fork length.
        if ((state[0] == T) or (state[1] == T)) and (state[0] != state[1]):
            adopt_new_state_1_index = state_mapping[(1, 0, 'irrelevant')]
            adopt_new_state_2_index = state_mapping[(0, 1, 'irrelevant')]
            override_transitions[state_index, adopt_new_state_1_index] = alpha
            override_transitions[state_index, adopt_new_state_2_index] = 1 - alpha
            if underpaying:
                override_rewards[state_index, adopt_new_state_1_index] = rho * state[1]
                override_rewards[state_index, adopt_new_state_2_index] = rho * state[1]
            else:
               # attacker ahead
                if state[0] > state[1]: 
                    override_rewards[state_index, adopt_new_state_1_index] = overpayAttackerAhead(state[0], state[1], rho)
                    override_rewards[state_index, adopt_new_state_2_index] = overpayAttackerAhead(state[0], state[1], rho)
                # honest ahead
                else: 
                    override_rewards[state_index, adopt_new_state_1_index] = overpayHonestAhead(state[0], state[1], rho)
                    override_rewards[state_index, adopt_new_state_2_index] = overpayHonestAhead(state[0], state[1], rho) 
            continue
            
        # a > h, which must be true for override to succeed
        if state[0] > state[1]:
            # (a-h, 0, irrelevant)
            new_state_1 = (state[0]-state[1], 0, 'irrelevant')
            # (a-h-1, 1, relevant)
            new_state_2 = (state[0]-state[1]-1, 1, 'relevant')
            override_transitions[state_index, state_mapping[new_state_1]] = alpha
            override_transitions[state_index, state_mapping[new_state_2]] = 1 - alpha
            override_rewards[state_index, state_mapping[new_state_1]] = (1 - rho) * (state[1] + 1)
            override_rewards[state_index, state_mapping[new_state_2]] = (1 - rho) * (state[1] + 1)
        else:
            # filling in remainder of array.
            override_transitions[state_index, 0] = 1
            override_rewards[state_index, 0] = -1*rho*1000

    # making matrices sparse
    return ss.csr_matrix(override_transitions), ss.csr_matrix(override_rewards)

In [95]:
def getWaitMatrices(rho, underpaying=True):
    # creating the wait transition & reward matrices
    wait_transitions = np.zeros(shape = (num_states, num_states))
    wait_rewards = np.zeros(shape = (num_states, num_states))

    for state_index in range(num_states):
        state = states[state_index]
        # checking if we are at the max fork length.
        if ((state[0] == T) or (state[1] == T)) and (state[0] != state[1]):
            adopt_new_state_1_index = state_mapping[(1, 0, 'irrelevant')]
            adopt_new_state_2_index = state_mapping[(0, 1, 'irrelevant')]
            wait_transitions[state_index, adopt_new_state_1_index] = alpha
            wait_transitions[state_index, adopt_new_state_2_index] = 1 - alpha
            if underpaying:
                wait_rewards[state_index, adopt_new_state_1_index] = rho * state[1]
                wait_rewards[state_index, adopt_new_state_2_index] = rho * state[1]
            else:
               # attacker ahead
                if state[0] > state[1]: 
                    wait_rewards[state_index, adopt_new_state_1_index] = overpayAttackerAhead(state[0], state[1], rho)
                    wait_rewards[state_index, adopt_new_state_2_index] = overpayAttackerAhead(state[0], state[1], rho)
                # honest ahead
                else: 
                    wait_rewards[state_index, adopt_new_state_1_index] = overpayHonestAhead(state[0], state[1], rho)
                    wait_rewards[state_index, adopt_new_state_2_index] = overpayHonestAhead(state[0], state[1], rho) 
        
        # irrelevant or relevant
        elif ((state[2] == 'irrelevant') or (state[2] == 'relevant')) and (state[0]<T) and (state[1]<T):
            # (a+1, h, irrelevant)
            new_state_1 = (state[0] + 1, state[1], 'irrelevant')
            # (a, h+1, relevant)
            new_state_2 = (state[0], state[1] + 1, 'relevant')
            wait_transitions[state_index, state_mapping[new_state_1]] = alpha
            wait_transitions[state_index, state_mapping[new_state_2]] = 1 - alpha
        # active
        elif (state[2] == 'active') and (state[0]<T) and (state[1]<T) and (state[1]>0):
            # a >= h
            if state[0] >= state[1]: 
                # (a+1, h, active)
                new_state_1 = (state[0] + 1, state[1], 'active')
                # (a-h, 1, relevant)
                new_state_2 = (state[0] - state[1], 1, 'relevant')
                # (a, h+1, relevant)
                new_state_3 = (state[0], state[1] + 1, 'relevant')
                wait_transitions[state_index, state_mapping[new_state_1]] = alpha
                wait_transitions[state_index, state_mapping[new_state_2]] = gamma * (1 - alpha)
                wait_transitions[state_index, state_mapping[new_state_3]] = (1 - gamma) * (1 - alpha)
                wait_rewards[state_index, state_mapping[new_state_2]] = (1 - rho) * state[1]
            else:
                wait_transitions[state_index, 0] = 1
                wait_rewards[state_index, 0] = -1*rho*1000
        else:
            wait_transitions[state_index, 0] = 1
            wait_rewards[state_index, 0] = -1*rho*1000

    # making matrices sparse
    return ss.csr_matrix(wait_transitions), ss.csr_matrix(wait_rewards)

In [68]:
def getMatchMatrices(rho, underpaying=True):
    # creating the match transition & rewards matrices
    match_transitions = np.zeros(shape = (num_states, num_states))
    match_rewards = np.zeros(shape = (num_states, num_states))

    for state_index in range(num_states):
        state = states[state_index]
        # checking if we are at the max fork length.
        if ((state[0] == T) or (state[1] == T)) and (state[0] != state[1]):
            adopt_new_state_1_index = state_mapping[(1, 0, 'irrelevant')]
            adopt_new_state_2_index = state_mapping[(0, 1, 'irrelevant')]
            match_transitions[state_index, adopt_new_state_1_index] = alpha
            match_transitions[state_index, adopt_new_state_2_index] = 1 - alpha
            if underpaying:
                match_rewards[state_index, adopt_new_state_1_index] = rho * state[1]
                match_rewards[state_index, adopt_new_state_2_index] = rho * state[1]
            else:
               # attacker ahead
                if state[0] > state[1]: 
                    match_rewards[state_index, adopt_new_state_1_index] = overpayAttackerAhead(state[0], state[1], rho)
                    match_rewards[state_index, adopt_new_state_2_index] = overpayAttackerAhead(state[0], state[1], rho)
                # honest ahead
                else: 
                    match_rewards[state_index, adopt_new_state_1_index] = overpayHonestAhead(state[0], state[1], rho)
                    match_rewards[state_index, adopt_new_state_2_index] = overpayHonestAhead(state[0], state[1], rho) 
            continue

        # a >= h and relevant
        if (state[0] >= state[1]) and (state[2] == 'relevant') and (state[0]<T) and (state[1]<T) and (state[1]>0):
            # (a+1, h, active)
            new_state_1 = (state[0] + 1, state[1], 'active')
            # (a-h, 1, relevant)
            new_state_2 = (state[0] - state[1], 1, 'relevant')
            # (a, h+1, relevant)
            new_state_3 = (state[0], state[1] + 1, 'relevant')
            match_transitions[state_index, state_mapping[new_state_1]] = alpha
            match_transitions[state_index, state_mapping[new_state_2]] = gamma * (1 - alpha)
            match_transitions[state_index, state_mapping[new_state_3]] = (1 - gamma) * (1 - alpha)
            match_rewards[state_index, state_mapping[new_state_2]] = (1 - rho) * state[1]
        else:
            match_transitions[state_index, 0] = 1
            match_rewards[state_index, 0] = -1*rho*1000

    # making matrices sparse
    return ss.csr_matrix(match_transitions), ss.csr_matrix(match_rewards)

In [60]:
def overpayAttackerAhead(a, h, rho):
    assert(a > h)
    expr1 = (1 - rho) * (alpha * (1 - alpha)) / ((1 - 2 * alpha)**2)
    expr2 = (1/2) * ((a - h) / (1 - 2 * alpha) + a + h)
    return expr1 + expr2

def overpayHonestAhead(a, h, rho):
    assert(h > a)
    expr1 = (1 - np.power(alpha/(1-alpha), h - a)) * (-1*rho*h)
    expr2 = np.power(alpha/(1-alpha), h - a) * (1 - rho)
    expr3 = (alpha * (1-alpha)) / (np.power(1-2*alpha, 2)) + (h - a) / (1- 2 * alpha)
    return expr1 + expr2 * expr3

In [61]:
def getAllMatrices(rho, underpaying=True):
    adopt = getAdoptMatrices(rho, underpaying)
    override = getOverrideMatrices(rho, underpaying)
    wait = getWaitMatrices(rho, underpaying)
    match = getMatchMatrices(rho, underpaying)
    return [adopt[0], override[0], wait[0], match[0]], [adopt[1], override[1], wait[1], match[1]]

In [69]:
# initializing params
epsilon = 10e-5
T = 70
gamma = 0
alpha = 0.4

# the numbers of states is (T+1)*(T+1)*3 because each chain can be up to T length and there are 3 fork states.
num_states = (T+1)*(T+1)*3

# generate a state to integer mapping and list of states
state_mapping = {}
states = []
count = 0
for a in range(T+1):
    for h in range(T+1):
        for fork in ['irrelevant', 'relevant', 'active']:
            state_mapping[(a, h, fork)] = count
            states.append((a, h, fork))
            count += 1

In [102]:
low = 0; high = 1
while (high - low) >= epsilon / 8:
    print(high-low, epsilon/8)
    rho = (low + high) / 2
    matrices = getAllMatrices(rho, underpaying=True)
    rvi = mdptoolbox.mdp.RelativeValueIteration(matrices[0], matrices[1], epsilon/8)
    rvi.run()
    if rvi.average_reward > 0:
        low = rho
    else:
        high = rho
lower_bound = rho - epsilon
rho_prime = np.max(low - epsilon/4, 0)
matrices = getAllMatrices(rho_prime, underpaying=False)
rvi = mdptoolbox.mdp.RelativeValueIteration(matrices[0], matrices[1], epsilon)
rvi.run()
rvi.average_reward

1 1.25e-05
0.5 1.25e-05
0.25 1.25e-05
0.125 1.25e-05
0.0625 1.25e-05
0.03125 1.25e-05
0.015625 1.25e-05
0.0078125 1.25e-05
0.00390625 1.25e-05
0.001953125 1.25e-05
0.0009765625 1.25e-05
0.00048828125 1.25e-05


KeyboardInterrupt: 