In [1]:
import copy 
import matplotlib.pyplot as plt
import mdptoolbox as mdpt
import numpy as np

In [159]:
class State:
    def __init__(self, num_agents, length=2):
        self.num_agents = num_agents
        self.blocks = np.zeros((num_agents, num_agents), dtype=int)
        self.honest_fork = 0
        self.length = length
        self.adopted = np.zeros(num_agents, dtype=int) # bit vector indicating if an agent has adopted
        
    # Creates the first block and updates the honest fork.
    def initializeState(self):
        first_block = np.random.randint(self.num_agents)
        self.blocks[first_block, first_block] = 1
        self.honest_fork = first_block
        
    # returns the state from the perspective of the agent.
    # the state is a string with the following format:
    # XXX,XXX,X,X. 
    # - The first 3 numbers are the forks lengths
    # - The next 3 numbers are the amount of blocks owned in each fork
    # - The next number is the honest fork
    # - The last is a bit indicating if the agent has adopted (1) or not (0)
    def getState(self, agent_index):
        lens = np.sum(self.blocks, axis=0)
        temp_blocks = copy.deepcopy(self.blocks[agent_index])
        
        # update the lens and the blocks so agent_index is seen as zero index.
        lens[0], lens[agent_index] = lens[agent_index], lens[0]
        temp_blocks[0], temp_blocks[agent_index] = temp_blocks[agent_index], temp_blocks[0]
        
        # get honest fork from perspective of agent index.
        if self.honest_fork == 0:
            temp_honest_fork = agent_index
        elif self.honest_fork == agent_index:
            temp_honest_fork = 0
        else:
            temp_honest_fork = self.honest_fork
        
        return '{}{}{},{}{}{},{},{}'.format(
            *lens, *temp_blocks, temp_honest_fork, self.adopted[agent_index])
    
    def updateHonestFork(self):
        lens = np.sum(self.blocks, axis=0)
        if any(lens > lens[self.honest_fork]):
            self.honest_fork = np.argmax(lens)

    # Prints state from perspective of agent_index in human readable form.
    def prettyPrintState(self, agent_index):
        temp_state = self.getState(agent_index)
        print('fork lengths={}'.format(temp_state[:3]))
        print('owned blocks={}'.format(temp_state[4:7]))
        print('honest fork={}'.format(temp_state[8]))
        print('adopted={}'.format(bool(int(temp_state[10]))))
        
    # Define equality operator.
    def __eq__(self, other):
        eq = True
        if not np.all(self.blocks == other.blocks):
            eq = False
        if not np.all(self.adopted == other.adopted):
            eq = False
        if not self.honest_fork == other.honest_fork:
            eq = False
        return eq
    
    # Determines if the state is terminal.
    def isTerminal(self):
        lens = np.sum(self.blocks, axis=0)
        return self.length in lens

In [160]:
state = State(num_agents=3)
state.initializeState()
state.prettyPrintState(0)
print()
state.prettyPrintState(1)
print()
state.prettyPrintState(2)
print()
state.getState(0), state.getState(1), state.getState(2)

fork lengths=010
owned blocks=000
honest fork=1
adopted=False

fork lengths=100
owned blocks=100
honest fork=0
adopted=False

fork lengths=010
owned blocks=000
honest fork=1
adopted=False



('010,000,1,0', '100,100,0,0', '010,000,1,0')

In [161]:
# How to construct T and R from a set of policies for agents 1 & 2. agent 0 will optimize based on T, R
#
# 1. Get total possible states.
# 2. Get reachable states given agents 1 & 2 policies.
# 3. Construct T and R based on the size of the reachable states.
# 4. Find the probabilities of transitioning to each state.

In [162]:
# Helper class to enumerate all possible first person states.
class FirstPersonState:
    def __init__(self, state, length=2):
        self.state = np.array(state)
        self.terminal_length = length
    
    def __str__(self):
        res = ''
        for i in range(3):
            res += str(self.state[i])
        res += ','
        for i in range(3,6):
            res += str(self.state[i])
        res += ','
        res += str(self.state[6])
        res += ','
        res += str(self.state[7])
        return res
    
    def __setitem__(self, ind, val):
        self.state[ind] = val
        
    def __getitem__(self, ind):
        return self.state[ind]
    
    def isTerminal(self):
        return self.terminal_length in self.state[:3]

    def getHonestFork(self):
        return self.state[-2]
    
    def getAdopted(self):
        return self.state[-1]

    def updateHonestFork(self):
        if any(self.state[:3] > self.state[self.getHonestFork()]):
            self.state[-2] = np.argmax(self.state[:3])
    
    def __eq__(self, other):
        return all(self.state == other.state)

In [163]:
# Helper function to enumerate next states given a specific first person state.
def nextFirstPersonStates(state):
    new_states = []
    
    if state[-1] == 0:
        # me mining on my own fork
        temp = copy.deepcopy(state)
        temp[0] += 1
        temp[3] += 1
        temp.updateHonestFork()
        new_states.append(temp)
        
        # me adopting onto one of the other forks.
        for i in range(1,3):
            temp = copy.deepcopy(state)
            temp[i] += 1
            temp[3+i] += 1
            temp[-1] = 1
            temp.updateHonestFork()
            new_states.append(temp)
    else: # i already adopted, so only can mine on one of the other forks.
        for i in range(1,3):
            temp = copy.deepcopy(state)
            temp[i] += 1
            temp[3+i] += 1
            temp.updateHonestFork()
            new_states.append(temp)
    
    # anyone else mining on their own fork
    for i in range(1,3):
        temp = copy.deepcopy(state)
        temp[i] += 1
        temp.updateHonestFork()
        new_states.append(temp)
        
    # anyone else mining on another fork.
    for i in range(3):
        if state[i] != 0:
            for j in range(1,3):
                if j != i:
                    temp = copy.deepcopy(state)
                    temp[i] += 1
                    temp.updateHonestFork()
                    new_states.append(temp)
    return new_states

In [164]:
# Function to get all first person states possible for a given length.
def getAllPossibleFirstPersonStates(num_players=3, length=2):
    # initial 4 states.
    init_state = FirstPersonState([0,0,0,0,0,0,0,0], length)
    oneblock_state_a = FirstPersonState([1,0,0,1,0,0,0,0], length)
    oneblock_state_b = FirstPersonState([0,1,0,0,0,0,1,0], length)
    oneblock_state_c = FirstPersonState([0,0,1,0,0,0,2,0], length)
    
    # Setting up standard BFS.
    total_states = [init_state, oneblock_state_a, oneblock_state_b, oneblock_state_c]
    states_to_process = [oneblock_state_a, oneblock_state_b, oneblock_state_c]
    explored = [str(init_state)]
    
    while states_to_process:
        elem = states_to_process.pop()
        explored.append(str(elem))
        next_states = nextFirstPersonStates(elem)
        for s in next_states:
            if s not in total_states:
                total_states.append(s)
            if not s.isTerminal() and str(s) not in explored:
                states_to_process.append(s)
    return total_states

In [165]:
total_states = getAllPossibleFirstPersonStates()

In [166]:
len(total_states)

67

In [167]:
# Helper class to map state strings to indeces. 
class Ind:
    def __init__(self, total_states):
        self.stringToInd = {}
        self.indToString = {}
        for i in range(len(total_states)):
            self.stringToInd[str(total_states[i])] = i
            self.indToString[i] = str(total_states[i])
    
    def i(self, string):
        return self.stringToInd[string]

    def s(self, ind):
        return self.indToString[ind]

In [168]:
si = Ind(total_states)

In [169]:
# Now define fully selfish policy. 
selfish = np.ones(len(total_states))

In [174]:
# Finding reachable states for agent 0, given both agents 1 and 2 policies.

# initial 4 states.
init_state = State(num_agents=3)
oneblock_state_a = State(num_agents=3)
oneblock_state_a.blocks[0,0] = 1
oneblock_state_a.updateHonestFork()
oneblock_state_b = State(num_agents=3)
oneblock_state_b.blocks[1,1] = 1
oneblock_state_b.updateHonestFork()
oneblock_state_c = State(num_agents=3)
oneblock_state_c.blocks[2,2] = 1
oneblock_state_c.updateHonestFork()

In [182]:
# Setting up standard BFS.
reachable_states = [init_state.getState(0), oneblock_state_a.getState(0), 
                   oneblock_state_b.getState(0), oneblock_state_c.getState(0)]
states_to_process = [oneblock_state_a, oneblock_state_b, oneblock_state_c]
explored = [init_state]

In [183]:
while states_to_process:
    cur_state = states_to_process.pop(0)
    explored.append(cur_state)
    reachable_states.append(cur_state.getState(0))

    # Determine agents 1 and 2 actions. In this case they will only be selfish
    actions = [(1,1), (2,2)]
    
    actions.append((0, cur_state.honest_fork))
    if cur_state.adopted[0] == 0: # not adopted
        # Agent 0 can be selfish.
        actions.append((0,0))
        
    next_states = []
    for a in actions:
        tmp_state = copy.deepcopy(cur_state)
        tmp_state.blocks[a] += 1
        tmp_state.updateHonestFork()
        if a[0] != a[1]: # adopting
            print(tmp_state.adopted)
            tmp_state.adopted[a[0]] = 1
            print(tmp_state.adopted)
        next_states.append(tmp_state)
        
    for s in next_states:
        if not s.isTerminal() and s not in explored:
            states_to_process.append(s)
reachable_states = list(set(reachable_states))

[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]
[0 0 0]
[1 0 0]


In [177]:
reachable_states

['110,100,1,0',
 '111,100,2,0',
 '110,100,0,0',
 '100,100,0,0',
 '011,000,1,0',
 '101,100,0,0',
 '111,100,0,0',
 '011,000,2,0',
 '111,100,1,0',
 '010,000,1,0',
 '000,000,0,0',
 '001,000,2,0',
 '101,100,2,0']

In [None]:
def constructTR(agent1_policy, agent2_policy)