In [None]:
from games.GameNodeckRandom import GameNodeckRandom
from players.PlayerProbabilistic import PlayerProbabilistic
from players.PlayerRandom import PlayerRandom
from Turn import Turn
from Card import Card

from keras.models import Sequential
from keras.layers import InputLayer
from keras.layers import Dense
from keras.losses import BinaryCrossentropy
from keras.saving import load_model

import numpy as np
from random import randint, sample
import copy

id = 0

"""
state for a turn:

3 tragets
+
3 opponents
*
(
20 card weights
+
5 ranks * 3 amounts = 15 card amounts
)
+
20 cards of mine

OR

1 target
20 cards of mine
x last moves
ranks (1-5)
amount (1-3)
4 suits
1 responce (0-3)


action:
5 ranks
*
4 suits
+
5 ranks
*
4 suits (ommited)
+
5 ranks
*
6 suit combinations

"""

ranks = [' 2', ' 3', ' 4', ' 5', ' 6']
suits = ['♠', '♥', '♣', '♦']

observation_space_n = 3 + 3 * (20 + 15) + 20
action_space_n = 5 * 4 + 5 * 4 + 5 * 6

def weights_to_list(weights):
    wl = []

    for rank in ranks:
        for suit in suits:
            card = (rank, suit)
            wl.append(-1) if card not in weights.keys() else wl.append(weights[card])

    return wl

def amount_to_bit(amount):
    mask = [0 for _ in range(3)]

    for i in amount:
        if i > 3 or i < 0:
            break
        mask[i - 1] = 1

    return mask

def amounts_to_list(amounts):
    al = []

    for rank in ranks:
        al.extend([-1, -1, -1]) if rank not in amounts.keys() else al.extend(amount_to_bit(amounts[rank]))

    return(al)

def cards_to_bit(cards):
    c = []

    for rank in ranks:
        for suit in suits:
            c.append(1) if Card(suit, rank) in cards else c.append(-1)

    return c

def card_by_bit_number(x):
    i = 0

    for rank in ranks:
        for suit in suits:
            if i >= x:
                return Card(suit, rank)
            else: i += 1


def get_state(game, player):
    target = [-1, -1, -1]
    target[game.rel_target_id] = 1

    # make a turn, so that player objects updates its weights
    player.get_turn(game, game.rel_target_id)

    weights = player.weights
    amounts = player.amounts

    target_info = []

    for i in range(3):
        target_info.extend(weights_to_list(weights[i]))
        target_info.extend(amounts_to_list(amounts[i]))

    my_cards_info = cards_to_bit(player.cards)

    state = target + target_info + my_cards_info

    return np.array([state])


def action_to_turn(in_action, player):
    # get a turn from action
    # first 20 are amount 1
    # second 20 are amount 3
    # other 30 are amount 2

    move = np.argmax(in_action[0])
    s = []

    for i in range(4):
        for j in range(i+1, 4):
            s.append([i, j])

    if move < 20:
        amount = 1
        move0 = 0
    elif move < 40:
        amount = 3
        move0 = 20
    else:
        amount = 2
        move0 = 40

    diff = move - move0

    if amount == 2:
        rank = diff // 6
        suit = diff % 6
        suit = s[suit]
    else:
        rank = diff // 4
        suit = [diff % 4]

    if amount == 3:
        suit = [i for i in range(4) if i not in suit]

    turn = Turn(ranks[rank], amount, [suits[_] for _ in suit])

    return turn


def turn_to_action(turn):

    am = 0
    if 3 == turn.count:
        am = 20
    if 2 == turn.count:
        am = 40

    rn = ranks.index(turn.rank) * 6 if 2 == turn.count else ranks.index(turn.rank) * 4

    if 2 == turn.count:
        st = [suits.index(s) for s in turn.suits]
        if st[0] > st[1]:
            st = st[::-1]

        s = []
        for i in range(4):
            for j in range(i+1, 4):
                s.append([i, j])

        suit = s.index(st)

    elif 1 == turn.count:
        suit = suits.index(turn.suits[0])

    else:
        suit = [i for i in range(4) if i not in [suits.index(s) for s in turn.suits]][0]

    act = am + rn + suit

    action = [0 for _ in range(action_space_n)]
    action[act] = 1

    return [action]


def get_reward(responce, old_cards, new_cards, old_chests, new_chests):

    # if we gained a chest: [5, 5, 5]
    if len(new_chests) > len(old_chests):
        return 5

    # if we gained a card:  [1, 1, 1]
    if len(new_cards) > len(old_cards):
        return 1

    # if we've lost a card
    if len(new_cards) < len(old_cards):
        return 0

    # if nothin happened
    return 0


def get_target_vector(target, reward, gamma):
    target_vector = [_ for _ in target]

    move = np.argmax(target_vector)
    antireward = reward / action_space_n * 10

    for i in range(action_space_n):
        if i == move:
            target_vector[i] += reward * gamma
        else:
            target_vector[i] -= antireward

    return np.array([target_vector])

def get_random_turn(player, game):

    rank = player.cards[randint(0, len(player.cards)) - 1].rank
    count = randint(1, 3)
    suits = sample(game.suits, count)

    turn = Turn(rank, count, suits)

    return turn


In [None]:
def get_guessing_turns(guesser, target):
    rset = list(set([c.rank for c in guesser.cards]))
    tset = list(set([c.rank for c in target.cards]))

    turns = []
    for r in rset:
        if r not in tset: continue

        cards = []
        for c in target.cards:
            if c.rank == r:
                cards.append(c)

        turns.append(Turn(r, len(cards), [c.suit for c in cards]))

    return turns

def get_invalid_turns(guesser):
    rt = list(set([c.rank for c in guesser.cards]))
    rset = [r for r in ranks if r not in rt]

    turns = []


    for r in rset:
        # 1
        for i in range(4):
            turns.append(Turn(r, 1, [suits[i]]))

        # 2
        for i in range(4):
            for j in range(i+1, 4):
                turns.append(Turn(r, 2, suits[i] + suits[j]))

        # 3
        for i in range(4):
            turns.append(Turn(r, 3, suits[:i] + suits[i+1:]))

    return turns


In [None]:
model = Sequential()
model.add(InputLayer(batch_input_shape=(1, observation_space_n)))
# model.add(Dense(109, activation='sigmoid'))
# model.add(Dense(90, activation='sigmoid'))
model.add(Dense(action_space_n, activation='linear'))

model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# model = load_model("models/model.keras")

In [None]:
discount_factor = 0.95
eps = 0.9
eps_decay_factor = 0.996

gamma = 0.5
gamma_decay_factor = 0.994

In [None]:
train = False
explore = False
verb = False

In [None]:
r_history = []
inv_history = []
chests = [0 for _ in range(4)]
chest_history = []


In [None]:
games = []

for i in range(50): #episodes

    moves = dict([[r, []] for r in ranks])

    players = [PlayerProbabilistic(0),
               PlayerRandom(1),
               PlayerRandom(2),
               PlayerRandom(3)]

    game = GameNodeckRandom(ranks,
            suits,
            players,
            verbose=verb)

    c_rwrd = 0

    print(f"GAME {i}")
    game.start()

    eps *= eps_decay_factor
    gamma *= gamma_decay_factor

    while game.current_player_id != id:
        game.step()

    state = get_state(game, players[id])
    print("")
    print(state)
    print("")
    player_state = players[id].cards

    over = False

    invalids = 0

    while not over:

        p_cards = [_ for _ in players[id].cards]
        p_chests = [_ for _ in players[id].chests]

        turns_g = get_guessing_turns(players[id], players[id + 1 + game.rel_target_id])
        turns_i = get_invalid_turns(players[id])

        actions_g = [np.array(turn_to_action(t)) for t in turns_g]
        actions_i = [np.array(turn_to_action(t)) for t in turns_i]

        tgt = np.array([[0 for _ in range(action_space_n)]])

        for a in actions_g:
            tgt += a

        for a in actions_i:
            tgt += a * -1

        if np.random.random() < eps and explore: # get a random turn

            turn = get_random_turn(players[id], game)
            action = turn_to_action(turn)

        else: # get a non-random turn

            i = 1
            while True:
                action = model.predict(state, verbose=0)
                turn = action_to_turn(action, players[id])

                if turn.rank not in [c.rank for c in players[id].cards]:
                    invalids += 1

                    if train:
                        model.fit(
                            state,
                            tgt,
                            epochs=1, verbose=0
                        )
                        i += 1
                    else:
                        turn = get_random_turn(players[id], game)
                        action = turn_to_action(turn)
                        break
                else:
                    break

        over = game.step(turn)
        responce = game.responce


        if train:
            model.fit(
                state,
                tgt,
                epochs=1, verbose=0
            )

        while game.current_player_id != id and not over:
            over = game.step()
        if over: break

        new_state = get_state(game, players[id])
        new_player_state = players[id].cards

        state = new_state
        player_state = new_player_state

    r_history.append(c_rwrd)
    inv_history.append(invalids)

    sum = game.summary()

    games.append({'log': game.log, 'summary': sum})

    for i in range(4):
        chests[i] += len(sum['chests'][i]) // 4

    chest_history.append(len(sum['chests'][id]) // 4)
    print(len(sum['chests'][id]) // 4)



In [None]:
from matplotlib import pyplot as plt

print(chests)

plt.plot(inv_history)
plt.show()

In [None]:
lengths = []
win_stats = dict([[p.id, 0] for p in players])
chests_stats = dict([[p.id, 0] for p in players])
delta_cards_stats = dict([[p.id, 0] for p in players])

for game in games:
    lengths.append(len(game['log']))

    max_chests = -1
    best_id = 0

    for i in [p.id for p in players]:
        c = int(len(game['summary']['chests'][i]) / 4)
        chests_stats[i] += c
        delta_cards_stats[i] += int(len(game['summary']['chests'][i])) - len([p.rank for p in game['summary']['start'][i]])

        if c > max_chests:
            max_chests = c
            best_id = i

    win_stats[best_id] += 1


In [None]:
from matplotlib import pyplot as plt

player_names = ['Model', 'Probabilistic', 'Probabilistic', 'Probabilistic']

print(f"AVEARGE GAME LENGTH: {np.mean(lengths)} MOVES")
print(f"FINAL CARDS AMOUNT: {chests_stats}")

ratios = [i / sum(chests_stats.values()) for i in chests_stats.values()]

plt.bar([f'{player_names[i]} {i}' for i in range(4)], ratios)

plt.ylim(0, 1)

plt.ylabel("Total cards ratios")

plt.show()

In [None]:
model.save("model.keras")