### Goal
To use deep q-learning to make a bot that plays Secret Hitler.

In [14]:
import random
from numpy.random import permutation
import numpy as np

In [99]:
INPUT_SIZE = 100
class input_maker():
    @staticmethod
    def roles(seat, rs):
        inpts = np.zeros(INPUT_SIZE)
        my_role = rs[seat]
        if my_role in [0, 2]:
            inpts[seat] = 1
            return inpts
        
        inpts[:7][np.array(rs) == my_role] = 1
        return inpts
    
    @staticmethod
    def cabinet(pres, chance):
        inpts = np.zeros(INPUT_SIZE)
        inpts[7+pres] = 1
        inpts[14+chance] = 1
        return inpts
    
    @staticmethod
    def votes(vs):
        inpts = np.zeros(INPUT_SIZE)
        inpts[21:28] = np.array(vs) * 2 - 1 # -1 for nein, 1 for ja, better for the NN
        return inpts
    
    @staticmethod
    def kill(kill):
        inpts = np.zeros(INPUT_SIZE)
        inpts[28 + kill] = 1
        return inpts
    
    @staticmethod
    def se(se):
        inpts = np.zeros(INPUT_SIZE)
        inpts[35 + se] = 1
        return inpts
    
    @staticmethod
    def inv(i, role):
        # For everyone
        inpts = np.zeros(INPUT_SIZE)
        inpts[42 + i] = 1
        inpts[49 + role] = 1 # role is 0 for lib, 1 for fas/hitty
        return inpts
    
    @staticmethod
    def inv_card(i, card):
        # For pres (and it'll also see the next regular inv inpts)
        inpts = np.zeros(INPUT_SIZE)
        inpts[42 + i] = 1
        inpts[51 + card] = 1
        return inpts
    
    @staticmethod
    def claim_cards(pres_claim, chance_claim, policy_passed):
        inpts = np.zeros(INPUT_SIZE)
        inpts[53:56] = np.array(pres_claim)*2 - 1
        inpts[56:58] = np.array(chance_claim)*2 - 1
        inpts[58 + policy_passed] = 1
    
    # few more needed:
    # pres_pick, chance_pick, ...
        

In [125]:
class Game():
    def __init__(self, players, roles=None, verbose=0):
        self.verbose = verbose
        self.players = players
        if roles is None:
            roles = permutation([0, 0, 0, 0, 1, 1, 2])
            
        if self.verbose >= 1:
            print(*map(lambda x: ["Lib", "Fas", "Hit"][x], roles))
            
        self.roles = roles
        self.lib_count = 0
        self.fas_count = 0
        self.last_pres = -1
        self.last_chance = -1
        self.pres = 0
        self.chance = 0
        self.on_se = False
        
        
        self.dead = [False for i in range(7)]
        
        for i, player in enumerate(self.players):
            player.feed_inputs(input_maker.roles(i, roles))
            player.start_game(roles[i], i)
            
        self.deck = permutation([0 for i in range(6)] + [1 for i in range(11)]).tolist()
            
    def filter_(self, lis):
        return [0 if self.dead[i] else e for i, e in enumerate(lis)]
    
    def next_pres(self, se_pres=None):
        if self.on_se:
            if se_pres is None:
                self.pres, self.last_pres = (self.last_pres + 1)%7, self.pres
                self.on_se = False
            else:
                self.pres, self.last_pres = se_pres, self.pres
        else:
            self.pres, self.last_pres = (self.pres + 1)%7, self.pres
        self.last_chance = self.chance
        
        if len(self.deck) < 3:
            self.deck = permutation([0 for i in range(6 - self.lib_count)] + [1 for i in range(11 - self.fas_count)]).tolist()
        
    def play_turn(self):
        chance_picks = self.players[self.pres].pick_chance()
        # Find legal chancellor pick
        i = 0
        chance = chance_picks[i]
        while chance in [self.last_pres, self.last_chance, self.pres] or self.dead[chance]:
            i += 1
            chance = chance_picks[i]
            
        self.chance = chance
        
        if self.verbose >= 1:
            print(f"{self.pres+1} has chosen {self.chance+1} as chancellor.")
        
        # Feed cabinet to players
        inpts = input_maker.cabinet(self.pres, self.chance)
        for player in self.players:
            player.feed_inputs(inpts)
            
        # Tally votes & feed them to players
        votes = self.filter_([player.vote() for player in self.players])
        
        if self.verbose >= 2:
            print("Votes:", *list(map(lambda x: "Ja" if x else "Nein", votes)))
            
        inpts = input_maker.votes(votes)
        for player in self.players:
            player.feed_inputs(inpts)
        
        # Check if vote passed
        if sum(votes) < 4:
            # Cabinet didn't passs
            if self.verbose >= 2:
                print("Failed to pass.")
            self.next_pres()
            return
        
        # Cabinet passed
        if self.verbose >= 2:
            print("Cabinet passes.")
        
        # Grab top three cards off of deck
        cards = sorted(self.deck[:3])
        self.deck = self.deck[3:]
        
        # President discards one
        discard = self.players[self.pres].pick_card_pres(cards[:])
        pres_claim = self.players[self.pres].claim_cards_pres(cards[:])
        
        if self.verbose >= 1:
            print(f"President ({self.pres+1}) claims {''.join(map(lambda x: 'R' if x else 'B', pres_claim))} (sees {''.join(map(lambda x: 'R' if x else 'B', cards))})"
                 )
        cards.remove(discard)
        # Chancellor enacts one
        enacted = self.players[self.chance].pick_card_chance(cards[:])
        chance_claim = self.players[self.chance].claim_cards_chance(cards[:])
        if self.verbose >= 1:
            print(f"Chancellor ({self.chance+1}) claims {''.join(map(lambda x: 'R' if x else 'B', chance_claim))} (sees {''.join(map(lambda x: 'R' if x else 'B', cards))})"
                 )
        if enacted == 0:
            self.lib_count += 1
        else:
            self.fas_count += 1
        
        if self.verbose >= 1:
            print(f"{'Fascist' if enacted else 'Liberal'} policy enacted."
                 )
            
        if self.verbose >= 1:
            print(f"Current tally: L{self.lib_count}, F{self.fas_count}")
        
        # Share claims with the players
        inpts = input_maker.claim_cards(pres_claim, chance_claim, enacted)
        for player in self.players:
            player.feed_inputs(inpts)
            
            
        picking_se = False
        if enacted == 0:
            if self.lib_count == 5:
                # Libs win
                return 1
        else:
            if self.fas_count == 2:
                # inv power
                inv = self.players[self.pres].pick_inv()
                role = self.roles[inv]
                inv_claim = self.players[self.pres].claim_inv(role)
                if self.verbose >= 1:
                    print(f"{self.pres+1} investigates {inv + 1} as {'fascist' if inv_claim else 'liberal'}."
                         )
                # feed inv claim to all players
                inpts = input_maker.inv(inv, role)
                for player in self.players:
                    player.feed_inputs(inpts)

            elif self.fas_count == 3:
                # se power
                se = self.players[self.pres].pick_se()
                if self.verbose >= 1:
                    print(f"{self.pres+1} special elects {se+1}.")

                # feed inputs
                inpts = input_maker.se(se)
                for player in self.players:
                    player.feed_inputs(inpts)

                self.on_se = True
                picking_se = True
                self.next_pres(se_pres=se)

            elif self.fas_count == 4 or self.fas_count == 5:
                # kill power
                kill = self.players[self.pres].pick_kill()
                i = 0
                k = kill[i]
                while k == self.pres or (self.roles[self.pres] > 0 and self.roles[k] == 2):
                    i += 1
                    k = kill[i]
                self.dead[k] = True
                if self.verbose >= 1:
                    print(f"{self.pres+1} kills {k+1}.")
            elif self.fas_count == 6:
                # Fas win
                return -1
        
        if not picking_se:
            self.next_pres()
        
        return
        # For now I'll hold off on se/inv/etc...

In [126]:
class RandomPlayer():
    def __init__(self):
        pass
        
    def start_game(self, role, seat):
        self.role = role
        self.seat = seat
        
    def feed_inputs(self, inputs):
        pass
        
    def vote(self):
        # 1 = ja, 0 = nein
        return random.choice([0, 1])
    
    def pick_kill(self):
        # priority of which person to kill first if possible
        return permutation(range(7))
    
    def pick_chance(self):
        # priority of which person to pick first if possible
        return permutation(range(7))
    
    def pick_se(self):
        seat = random.choice(range(7))
        while seat == self.seat:
            seat = random.choice(range(7))
        return seat
    
    def pick_inv(self):
        seat = random.choice(range(7))
        while seat == self.seat:
            seat = random.choice(range(7))
        return seat
    
    def claim_inv(self, role):
        # 0 = lib, 1 = fas, 2 = hitty
        if self.role == 0:
            return role
        return random.choice([0, 1, 2])
    
    def pick_card_pres(self, cards):
        # Pick a card for the president to discard
        # 0 = lib, 1 = fas
        if self.role == 0 and 1 in cards:
            return 1
        return random.choice(cards)
    
    def pick_card_chance(self, cards):
        # Pick a card to enact as chancellor
        if self.role == 0 and 0 in cards:
            return 0
        return random.choice(cards)
    
    def claim_cards_pres(self, cards):
        if self.role == 0:
            return cards
        cards = sorted([random.choice([0, 1]) for i in range(3)])
        return cards
    
    def claim_cards_chance(self, cards):
        if self.role == 0:
            return cards
        cards = sorted([random.choice([0, 1]) for i in range(2)])
        return cards

In [127]:
players = [RandomPlayer() for i in range(7)]
game = Game(players, verbose=1)
while game.play_turn() is None:
    pass

Fas Lib Fas Lib Lib Hit Lib
1 has chosen 4 as chancellor.
2 has chosen 7 as chancellor.
President (2) claims BRR (sees BRR)
Chancellor (7) claims BR (sees BR)
Liberal policy enacted.
Current tally: L1, F0
3 has chosen 5 as chancellor.
President (3) claims BBR (sees RRR)
Chancellor (5) claims RR (sees RR)
Fascist policy enacted.
Current tally: L1, F1
4 has chosen 1 as chancellor.
5 has chosen 6 as chancellor.
6 has chosen 1 as chancellor.
President (6) claims BRR (sees BBR)
Chancellor (1) claims BB (sees BR)
Liberal policy enacted.
Current tally: L2, F1
7 has chosen 2 as chancellor.
1 has chosen 6 as chancellor.
2 has chosen 5 as chancellor.
3 has chosen 7 as chancellor.
President (3) claims BBR (sees RRR)
Chancellor (7) claims RR (sees RR)
Fascist policy enacted.
Current tally: L2, F2
3 investigates 4 as liberal.
4 has chosen 5 as chancellor.
5 has chosen 7 as chancellor.
6 has chosen 3 as chancellor.
7 has chosen 4 as chancellor.
1 has chosen 3 as chancellor.
President (1) claims RRR 