In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [6]:
from itertools import product, chain
from random import choices, shuffle

In [7]:
ALL_SUITS = list(range(5))
ALL_CARDS = [(x,y) for (x,y) in product(ALL_SUITS, range(9)) if x>0 or y<4]
ALL_TASKS = list(ALL_CARDS[4:])

In [8]:
CARD_INDICER = {x:i for i, x in enumerate(ALL_CARDS)}
TASK_INDICER = {x:i for i, x in enumerate(ALL_TASKS)}

In [9]:
N = 4
M = 4

In [13]:
class AssignerNet(nn.Module):
    def __init__(self, n):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(len(TASK_INDICER)*(n+1)+n+len(CARD_INDICER), 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, len(TASK_INDICER)),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

class Assigner:
    
    def __init__(self, n, m):
        self.n = n
        self.m = m
        self.nn = AssignerNet(n)
    
    def assign(self, hands, tasks):
        lead = next(i for i in range(self.n) if (0,3) in hands[i])
        chooser = lead
        task_mask = np.zeros(len(TASK_INDICER), dtype=bool)
        task_assignment_masks = np.zeros((self.n, len(TASK_INDICER)), dtype=bool)
        lead_mask = np.zeros(self.n, dtype=bool)
        hand_masks = np.zeros((self.n, len(CARD_INDICER)), dtype=bool)
        
        for t in tasks:
            task_mask[TASK_INDICER[t]] = 1
        
        for i in range(self.n):
            for card in hands[i]:
                hand_masks[i][CARD_INDICER[card]] = 1
        for i in range(self.m):
            lead_mask[(lead-chooser+self.n)%self.n] = 1
            inp = np.concatenate((
                task_mask,
                task_assignment_masks[chooser:].flatten(),
                task_assignment_masks[:chooser].flatten(),
                lead_mask,
                hand_masks[chooser]
            ))
            out = self.nn(torch.tensor(inp, dtype=torch.float)).detach().numpy().flatten()
            out[~task_mask] = 0.
            out[task_assignment_masks.sum(axis=0).astype(bool)] = 0.
            # print(out)
            # print(task_mask.astype(int))
            # print(task_assignment_masks.astype(int))
            # print(task_assignment_masks.sum(axis=0).astype(int))
            # print()
            k = np.argmax(out)
            task_assignment_masks[chooser][k] = 1
            lead_mask[(lead-chooser+self.n)%self.n] = 0
            chooser = (chooser+1)%self.n
        
        return task_assignment_masks

In [2]:
class PlayerNet(nn.Module):
    def __init__(self, n):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(len(TASK_INDICER)*(n+1)+n+len(CARD_INDICER), 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, len(CARD_INDICER)),
            nn.Sigmoid(),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

class Player:
    
    def __init__(self, n, m):
        self.n = n
        self.m = m
        self.nn = AssignerNet(n)
    
    def assign(self, hands, tasks):
        lead = next(i for i in range(self.n) if (0,3) in hands[i])
        chooser = lead
        task_mask = np.zeros(len(TASK_INDICER), dtype=bool)
        task_assignment_masks = np.zeros((self.n, len(TASK_INDICER)), dtype=bool)
        lead_mask = np.zeros(self.n, dtype=bool)
        hand_masks = np.zeros((self.n, len(CARD_INDICER)), dtype=bool)
        
        for t in tasks:
            task_mask[TASK_INDICER[t]] = 1
        
        for i in range(self.n):
            for card in hands[i]:
                hand_masks[i][CARD_INDICER[card]] = 1
        for i in range(self.m):
            lead_mask[(lead-chooser+self.n)%self.n] = 1
            inp = np.concatenate((
                task_mask,
                task_assignment_masks[chooser:].flatten(),
                task_assignment_masks[:chooser].flatten(),
                lead_mask,
                hand_masks[chooser]
            ))
            out = self.nn(torch.tensor(inp, dtype=torch.float)).detach().numpy().flatten()
            out[~task_mask] = 0.
            out[task_assignment_masks.sum(axis=0).astype(bool)] = 0.
            # print(out)
            # print(task_mask.astype(int))
            # print(task_assignment_masks.astype(int))
            # print(task_assignment_masks.sum(axis=0).astype(int))
            # print()
            k = np.argmax(out)
            task_assignment_masks[chooser][k] = 1
            lead_mask[(lead-chooser+self.n)%self.n] = 0
            chooser = (chooser+1)%self.n
        
        return task_assignment_masks

NameError: name 'nn' is not defined

In [14]:
class Game:
    def __init__(self, n, m):
        self.n = n
        self.m = m
        self.tasks = choices(ALL_TASKS, k=m)
        self.initiate_hands()
        self.lead = next(i for i in range(self.n) if (0,3) in self.hands[i])
        self.assigner = Assigner(self.n, self.m)
    
    def assign_tasks(self):
        self.task_assignment_masks = self.assigner.assign(self.hands, self.tasks)
        # print(self.task_assignment_masks.astype(int))
        # print()
    
    def initiate_hands(self):
        deck = list(ALL_CARDS)
        shuffle(deck)
        self.hands = []
        for i in range(self.n):
            self.hands.append(deck[i*(40//self.n):(i+1)*(40//self.n)])
    
    def play_randomly(self):
        self.won_cards = [[] for _ in range(self.n)]
        self.task_completion_masks = np.zeros((self.n, len(TASK_INDICER)), dtype=bool)
        lead = self.lead
        for i in range(40//self.n):
            trick = choices(self.hands[lead])
#             print(trick, self.hands[lead])
            self.hands[lead].remove(trick[0])
            w = 0
            for j in range(1, self.n):
                options = [c for c in self.hands[(lead+j)%self.n] if c[0]==trick[0][0]]
                if not options:
                    options = self.hands[(lead+j)%self.n]
                trick.extend(choices(options))
                self.hands[(lead+j)%self.n].remove(trick[-1])
                if (trick[j][0] == trick[w][0] and trick[j][1] > trick[w][1]) or (trick[j][0] == 0 < trick[w][0]):
                    w = j
            w = (w + lead) % self.n
            self.won_cards[w].extend(trick)
            for c in trick:
                if c[0] > 0:
                    t = TASK_INDICER[c]
                    self.task_completion_masks[w][t] = self.task_assignment_masks[:, t].sum()
            lead = w
        
        self.score = (self.task_assignment_masks & self.task_completion_masks).sum() / self.m
        # print(self.task_assignment_masks.astype(int))
        # print(self.task_completion_masks.astype(int))
        # print()

#         assert sorted(list(chain(*self.hands)) + list(chain(*self.won_cards))) == ALL_CARDS, (sorted(list(chain(*self.hands))), ALL_CARDS, self.hands)


In [15]:
games = []
for i in range(100):
    game = Game(N, M)
    game.assign_tasks()
    game.play_randomly()
    games.append(game)

scores = [game.score for game in games]

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [16]:
np.mean(scores)

0.265

True