In [1]:
import pandas as pd
import numpy as np
import random
import copy
import math

In [2]:
actions_map = {
    0: 'take 1 coin',
    1: 'coup',
    2: 'take 2 coins',
    3: 'take 3 coins',
    4: 'steal 2 coins',
    5: 'assassinate',
    6: 'exchange',
    7: 'challenge',
    8: 'block foreign aid',
    9: 'block stealing',
    10: 'block assassination'
}

In [3]:
class Action:
    def __init__(self, name, challengeable, response_card, response_action, 
                 p1_net_coins, p2_net_coins, p1_net_cards, p2_net_cards, base_utility, p_bluff):
        self.name = name
        self.challengeable = challengeable
        self.response_card = response_card
        self.response_action = response_action
        self.p1_net_coins = p1_net_coins
        self.p2_net_coins = p2_net_coins
        self.p1_net_cards = p1_net_cards
        self.p2_net_cards = p2_net_cards
        self.base_utility = base_utility
        self.p_bluff = p_bluff
    
    def update_responses(self, response_card, response_action):
        self.response_card = response_card
        self.response_action = response_action
        

In [4]:
take_1 = Action(actions_map[0], False, None, None, 1, 0, 0, 0, 1, 0)

coup = Action(actions_map[1], False, None, None, -7, 0, 0, -1, 4, 0)

take_2 = Action(actions_map[2], True, 'Duke', actions_map[8], 2, 0, 0, 0, 1, 0.1)

take_3 = Action(actions_map[3], True, None, actions_map[7], 3, 0, 0, 0, 1, 0.15)

steal_2 = Action(actions_map[4], True, ['Captain', 'Ambassador'], actions_map[9], 2, -2, 0, 0, 1, 0.5)

assassinate = Action(actions_map[5], True, 'Contessa', actions_map[10], -3, 0, 0, -1, 1, 0.5)

exchange = Action(actions_map[6], True, None, actions_map[7], 0, 0, 0, 0, 1.34, 0.5)

# challenge = Action(actions_map[7], False, None, None, 0, 0, -1, -1, 1, 0)

block_take_2 = Action(actions_map[8], True, None, actions_map[7], 0, -2, 0, 0, 1.5, 0.1)

block_steal = Action(actions_map[9], True, None, actions_map[7], 2, -2, 0, 0, 2, 0.5)

block_assassination = Action(actions_map[10], True, None, actions_map[7], 0, 0, 1, 0, 3, 0.5)

actions = {
    0: take_1,
    1: coup,
    2: take_2,
    3: take_3,
    4: steal_2,
    5: assassinate,
    6: exchange,
    7: block_take_2,
    8: block_steal,
    9: block_assassination
}

influences = {
    'Duke': [take_3, block_take_2, take_1, coup],
    'Captain': [steal_2, block_steal, take_2, take_1, coup],
    'Assassin': [assassinate, take_2, take_1, coup],
    'Contessa': [take_2, block_assassination, take_1, coup],
    'Ambassador': [exchange, block_steal, take_2, take_1, coup]
    }
}

influences_reverse = {
    take_1: ['Duke', 'Captain', 'Assassin', 'Contessa', 'Ambassador'],
    coup: ['Duke', 'Captain', 'Assassin', 'Contessa', 'Ambassador'],
    take_2: ['Captain', 'Assassin', 'Contessa', 'Ambassador'],
    take_3: ['Duke'],
    steal_2: ['Captain'],
    assassinate: ['Assassin'],
    exchange: ['Ambassador'],
    block_take_2: ['Duke'],
    block_steal: ['Captain','Ambassador'],
    block_assassination: ['Contessa']
}

In [5]:
# risk = {
#     take_1: 0,
#     coup: 0,
#     take_2: take_2.p_bluff * (3 * 1),
#     take_3: take_3.p_bluff * (3 * 1),
#     steal_2: steal_2.p_bluff * (3 * 1),
#     assassinate: assassinate.p_bluff * (3 * 1),
#     exchange: exchange.p_bluff * (3 * 1),
#     block_take_2: block_take_2.p_bluff * (3 * 1),
#     block_steal: block_steal.p_bluff * (3 * 1),
#     block_assassination: block_assassination.p_bluff * (3 * 2)
# }

# reward = {
#     take_1: 1,
#     coup: 3,
#     take_2: (1 - take_2.p_bluff) * 2,
#     take_3: (1 - take_3.p_bluff) * 3,
#     steal_2: (1 - steal_2.p_bluff) * 2,
#     assassinate: (1 - assassinate.p_bluff) * (3 * 2),
#     exchange: (1 - exchange.p_bluff) * exchange.base_utility,
#     block_take_2: (1 - block_take_2.p_bluff) * (2),
#     block_steal: (1 - block_steal.p_bluff) * (2),
#     block_assassination: (1 - block_assassination.p_bluff) * (3 * 2)
# }

def utility(influences, action, p_block, p_challenge):
    return ((-3 * action.p2_net_cards) + 
            action.p1_net_coins -
            action.p2_net_coins + 
            (1 - p_block) +
            (1 - p_challenge) * (np.where(np.isin(action, influences), 0, 1)) + 
            3 * (p_challenge) * (np.where(np.isin(action, influences), 1, 0)) +
            action.base_utility)


In [6]:
class Bot:
    def __init__(self, bluff_degree, cards, num_coins, hostility):
        self.bluff_degree = bluff_degree
        self.cards = cards
        self.num_coins = num_coins
        self.hostility = hostility
        self.kb = None
        
    def num_coins_adj(self, n):
        self.num_coins += n
    
    def cards_adj(self, card):
        self.cards.remove(card)
        
    def build_kb(self, kb):
        self.kb = kb
        
    def update_kb(self, influences, influence, action, kb, base_or_response, i, ):
        alpha = utility(influences, action, )
        for _ in kb:
            likelihood = math.exp(-alpha/5)
            kb *= likelihood
        self.kb[i] = [num / sum(kb) for num in kb]
        
        

In [11]:
bag = ['Duke', 'Captain', 'Assassin', 'Contessa', 'Ambassador'] * 3
random.shuffle(bag)

bots = []
bluff_degree = 0

for i in range(5):
    cards = random.sample(bag, 2)
    for card in cards:
        bag.remove(card)
#     kb = []
    
    bots.append(Bot(bluff_degree, cards, 2, None))
    bluff_degree += 0.25
    
# Bot 0
kb_j = []
m = 0
for influence in influences.keys():
    if bots[0].cards[0] == influence: 
        m += 1
    if bots[0].cards[1] == influence:
        m += 1
    kb_j.append(1 - (math.comb(13-(3-m), 2) / math.comb(13, 2)))
    m = 0
kb_j = [num / sum(kb_j) for num in kb_j]
kb = {
    bots[1]: copy.deepcopy(kb_j),
    bots[2]: copy.deepcopy(kb_j),
    bots[3]: copy.deepcopy(kb_j),
    bots[4]: copy.deepcopy(kb_j),
}
bots[0].build_kb(kb)



# Bot 1
kb_j = []
# m = 0
for influence in influences.keys():
    if bots[1].cards[0] == influence: 
        m += 1
    if bots[1].cards[1] == influence:
        m += 1
    kb_j.append(1 - (math.comb(13-(3-m), 2) / math.comb(13, 2)))
    m = 0
kb_j = [num / sum(kb_j) for num in kb_j]
kb = {
    bots[0]: copy.deepcopy(kb_j),
    bots[2]: copy.deepcopy(kb_j),
    bots[3]: copy.deepcopy(kb_j),
    bots[4]: copy.deepcopy(kb_j),
}
bots[1].build_kb(kb)



# Bot 2
kb_j = []
# m = 0
for influence in influences.keys():
    if bots[2].cards[0] == influence: 
        m += 1
    if bots[2].cards[1] == influence:
        m += 1
    kb_j.append(1 - (math.comb(13-(3-m), 2) / math.comb(13, 2)))
    m = 0
kb_j = [num / sum(kb_j) for num in kb_j]
kb = {
    bots[0]: copy.deepcopy(kb_j),
    bots[1]: copy.deepcopy(kb_j),
    bots[3]: copy.deepcopy(kb_j),
    bots[4]: copy.deepcopy(kb_j),
}
bots[2].build_kb(kb)



# Bot 3
kb_j = []
# m = 0
for influence in influences.keys():
    if bots[3].cards[0] == influence: 
        m += 1
    if bots[3].cards[1] == influence:
        m += 1
    kb_j.append(1 - (math.comb(13-(3-m), 2) / math.comb(13, 2)))
    m = 0
kb_j = [num / sum(kb_j) for num in kb_j]
kb = {
    bots[0]: copy.deepcopy(kb_j),
    bots[1]: copy.deepcopy(kb_j),
    bots[2]: copy.deepcopy(kb_j),
    bots[4]: copy.deepcopy(kb_j),
}
bots[3].build_kb(kb)



# Bot 4
kb_j = []
# m = 0
for influence in influences.keys():
    if bots[4].cards[0] == influence: 
        m += 1
    if bots[4].cards[1] == influence:
        m += 1
    kb_j.append(1 - (math.comb(13-(3-m), 2) / math.comb(13, 2)))
    m = 0
kb_j = [num / sum(kb_j) for num in kb_j]
kb = {
    bots[0]: copy.deepcopy(kb_j),
    bots[1]: copy.deepcopy(kb_j),
    bots[2]: copy.deepcopy(kb_j),
    bots[3]: copy.deepcopy(kb_j),
}
bots[4].build_kb(kb)


for bot in bots:
    print(bot.cards)
print(bots[0].kb)

['Captain', 'Contessa']
['Assassin', 'Duke']
['Duke', 'Ambassador']
['Contessa', 'Assassin']
['Assassin', 'Ambassador']
[[0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998], [0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998], [0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998], [0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998, 0.19999999999999998]]


In [None]:
def perform_action(action, p_1, p_2, bots, actions_inf, influences, influences_reverse, claim, bag):
#     p_1.num_coins_adj(action.p1_net_coins)
    print(f"{p_1} is performing {action.name} as {claim} against {p_2}")
    
    inf_list = actions_inf[action]
    for inf in inf_list:
        inf = list(influences).index(inf)
    
    if action.p1_net_coins < 0:
        p_1.net_coins_adj(action.p1_net_coins)
        
    if not action.challengeable and p_2 is None:
        p_1.net_coins_adj(action.p1_net_coins)
        return None
    
    if p_2 is not None: 
        
        if not action.challengeable:
            p_1.net_coins_adj(action.p1_net_coins)
            p_2.net_coins_adj(action.p2_net_coins)
            utilities = []
            for card in p_2.cards:
                for a in influences[card]:
                    utilities.append(utility(influences, a, 0, 0))
            p_2.cards_adj(p_2.cards[p2_cards.index(min(utilities))])
            return None
    
        if p_2.cards[0] in actions_inf[action] or p_2.cards[1] in actions_inf[action]:
            winner = perform_action(action.response_action, p_2, p_1, bots, actions_inf, influences, influences_reverse, claim)
            if winner = 1:
                print(f"{p_1} won the challenge, so their original action is fulfilled.")
                if action.p1_net_coins > 0:
                    p_1.net_coins_adj(action.p1_net_coins)
                p_2.net_coins_adj(action.p2_net_coins)
                if action.p2_net_cards < 0:
                    utilities = []
                    for card in p_2.cards:
                        for a in influences[card]:
                            utilities.append(utility(influences, a, 0, 0))
                    p_2.cards_adj(p_2.cards[p2_cards.index(min(utilities))])
                
                return None
#             elif winner = 0:
#                 utilities = []
#                 for card in p_1.cards:
#                     for a in influences[card]:
#                         utilities.append(utility(influences, a, 0, 0))
#                 p_1.cards_adj(p_1.cards[p1_cards.index(min(utilities))])
#                 return None
        
        suspicion = 0
        
        for j in range(len(inf_list)):
            suspicion += (1 - p_2.kb[p_1][j])
            
        if suspicion >= p_2.hostility:             # Challenge!
            print(f"{p_2} is challenging {p_1}.")
            if p_1.cards[0] in influences_reverse[action] or p_1.cards[1] in influences_reverse[action]:
                print(f"{p_1} has the card. {p_2} lost the challenge.")
                utilities = []
                for card in p_2.cards:
                    for a in influences[card]:
                        utilities.append(utility(influences, a, 0, 0))
                p_2.cards_adj(p_2.cards[p2_cards.index(min(utilities))])
                return 1
                
            else:
                print(f"{p_1} does not have the card. {p_1} lost the challenge.")
                utilities = []
                for card in p_1.cards:
                    for a in influences[card]:
                        utilities.append(utility(influences, a, 0, 0))
                p_1.cards_adj(p_1.cards[p1_cards.index(min(utilities))])
                return None
            
        else:
            print(f"{p_2} will not challenge.")
            if action.response_action != challenge and p_2.bluff_degree < utility(influence, action.response_action, 0, 0):
                winner = perform_action(action.response_action, p_2, p_1, bots, actions_inf, influences, influences_reverse, claim)
                return None
            if action.p1_net_coins > 0:
                p_1.net_coins_adj(action.p1_net_coins)
            p_2.net_coins_adj(action.p2_net_coins)
            if action.p2_net_cards < 0:
                utilities = []
                for card in p_2.cards:
                    for a in influences[card]:
                        utilities.append(utility(influences, a, 0, 0))
                p_2.cards_adj(p_2.cards[p2_cards.index(min(utilities))])
                return None
#                 for card in sample_cards:
#                     bag.remove(card)
                
        
    suspicion = []
    
    for i in range(len(bots)):
        s = 0
        for j in range(len(inf_list)):
            s += 1 - bots[i].kb[p_1][j]
        suspicion.append(s)
        
        
    if action == exchange:
        sample_cards = random.sample(bag, 2)
        utilities_current = []
        for card in p_1.cards:
            for a in influences[card]:
                utilities_current.append(utility(influences, a, 0, 0))
        utilities_sample = []
        for card in sample_cards:
            for a in influences[card]:
                utilities_sample.append(utility(influences, a, 0, 0))
        x = utilities_sample.index(max(utilities_sample))
        y = utilities_current.index(min(utilities_current))
        if utilities_sample[x] > utilities_current[y]:
            bag.remove(sample_cards[x])
            p_1.cards.remove(p_1.cards[y])
            bag.insert(utilities_current[y])
            random.shuffle(bag)
            p_1.cards.insert(-1, utilities_sample[x])

    

In [None]:
# actions = {
#     0: take_1,
#     1: coup,
#     2: take_2,
#     3: take_3,
#     4: steal_2,
#     5: assassinate,
#     6: exchange,
#     7: block_take_2,
#     8: block_steal,
#     9: block_assassination
# }

def game_loop(bots):
    
    actions_inf = {
        take_1: [],
        coup: [],
        take_2: ['Duke'],
        take_3: [],
        steal_2: ['Captain', 'Ambassador'],
        assassinate: ['Contessa'],
        exchange: [],
        block_take_2: [],
        block_steal: [],
        block_assassination: []
    }
    
    b = 5
#     base_actions = []
#     for influence, base in influences.items():
#         for action in base['base']:
#             if action not in [coup, take_1]
#                 base_actons.append(action)
    
    while b > 1:
        
        for i in range(len(bots)):
            
            p_challenge = [0, 0, 0.1, 0.2, 0.3, 0.3, 0.2, 0.2, 0.3]
            p_blocks = [[]]
            
            action_utilities = [[]]
            
            for j in range(len(actions_inf)):
                inf_list = actions_inf[actions_inf.keys()[j]]
                for inf in inf_list:
                    inf = list(influences).index(inf)
                
                if len(inf_list) == 0:
                    p_blocks.append([0] * 4)
                    continue
                    
                p_block = []
                
                for player in bots[i].kb.keys():
                    p = 0
                    for inf in inf_list:
                        p += player.kb[inf]
                    p_block.append(p)
                    
                p_blocks.append(p_block)
                
            for j in range(10):
                u_row = []
                for k in range(4):
                    u_row.append(utility(bots[i].cards, actions_inf.keys()[j], p_block[j][k], p_challenge[j]))
                action_utilities.append(u_row)
            
            action_utilities = np.array(action_utilities)
            max_utility = np.max(action_utilities)
            d = dict( (j,(x, y)) for x, i in enumerate(action_utilities) for y, j in enumerate(i) )
            
            index = d[max_utility]
            
                    
            
            