In [1]:
%load_ext autoreload
%autoreload 1
%aimport arena

  return f(*args, **kwds)
  return f(*args, **kwds)


## Create Random Deck

In [2]:
import random
import spellsource.utils
from collections import Counter, OrderedDict, namedtuple
import numpy as np
import pandas as pd
from spellsource.context import Context
from typing import List, Callable

In [3]:
RANDOM_DRAFT = lambda _, selection: np.array(selection)[np.eye(3)[np.random.choice(3, len(selection))] == 1]

def draft_decks(draft_class: str, number_of_decks: int, strategy: Callable[[List[str]], str]=RANDOM_DRAFT):
    card_picker = arena.ArenaCardPicker()
    drafts = []
    decks = [arena.Deck(draft_class, [], str(i)) for i in range(number_of_decks)]
    selections_log = [[] for _ in range(number_of_decks)]
    for _ in range(30):
        choices = [card_picker.GetCardChoice(draft_class) for _ in range(number_of_decks)]
        selection = strategy(decks, choices)
        for deck, card_choices, selection, log in zip(decks, choices, selection, selections_log):
            deck.cards.append(selection)
            log.append(arena.ArenaSelection(card_choices, selection))
    return [
        arena.Draft(draft_class, selections, str(i))
        for i, selections in enumerate(selections_log)
    ]

deck = draft_decks('Druid', 1)[0]
for selection in deck.selections:
    assert selection.selected in selection.choices

In [4]:
RANDOM_BEHAVIOUR = 'PlayRandomBehaviour'
def simulate(ctx: Context, drafts: List[arena.Draft], behaviour: str=RANDOM_BEHAVIOUR, games_per_opponent: int=10):
    """Start a simulation and return result.
    The simulator will let all drafts fight with other opponents for {game_per_opponent} time.
    If provided by 100 drafts, there will be 4950 * games_per_opponent simulations.
    Returns: returns a iterator of match result.
    """
    decks = [draft.GetDeck().ToHSString() for draft in drafts]
    return spellsource.utils.simulate(
        behaviours=(behaviour, behaviour),
        decks=decks,
        number=games_per_opponent,
        context=ctx
    )

In [5]:
ctx = Context()

In [6]:
CACHE = {}
def get_class_cards(draft_class: str):
    if draft_class in CACHE:
        return CACHE[draft_class]
    CACHE[draft_class] = list(arena.ArenaCardPicker().GetAllCards(draft_class))
    return CACHE[draft_class]

class CardCounter(Counter):
    """Unlike counter, Card Counter will initialize with a card_list.
    Card counter will initialzie each card in card_list with 0 occurance count.
    """
    def __init__(self, card_list, *args, **kwargs):
        super(CardCounter, self).__init__(*args, **kwargs)
        for card in card_list:
            self[card] += 0

# Construct training data
def construct_training_data(drafts: List[arena.Draft], match_result):
    # Count deck winrate
    win_rate = Counter()
    for item in match_result:
        for deck_index in range(2):
            win_rate[item['decks'][deck_index]] += item['results'][deck_index]['WIN_RATE']
    # Columns in output data shoule be stable, put everycard into the result dict
    # to deal with the case that some card are not appeared in drafts.
    class_cards = get_class_cards(drafts[0].draft_class)
    target = []
    decks = []
    selections = []
    for draft in drafts:
        for deck, selection in draft.Iter():
            decks.append(deck)
            selections.append(selection)
            reward = win_rate[deck.name] / (len(drafts) - 1) - 0.5
            # In traing, we use cross entropy loss. Loss = E(pi * log(yi))
            # For reinforcement learning, the "loss" functions is log(P(a|si)) * R(Si)
            # P(a|si) is the probibility of taking action p under current policy and state si
            # By comparing these two equation, we can find that replace pi in cross entropy
            # loss with R(si) will enable us to use traditional classifier to finish RL task
            target.append(CardCounter(class_cards, {
                selection.selected: reward
            }))
    return construct_prediction_data(decks, selections), pd.DataFrame(target)

def construct_prediction_data(decks: List[arena.Deck], selections: List[arena.ArenaSelection]):
    assert len(decks) == len(selections)
    deck_data = []
    choices_data = []
    cards = get_class_cards(decks[0].draft_class)
    for deck, selection in zip(decks, selections):
        deck_data.append(CardCounter(cards, deck.cards))
        choices_data.append(CardCounter(cards, selection.choices))
    return pd.concat([pd.DataFrame(deck_data), pd.DataFrame(choices_data)], axis=1, keys=['deck', 'choices'])

# Adhoc tests.
assert CardCounter(['a', 'b'], {'a': 1}) == {'a': 1, 'b': 0}
drafts = draft_decks('Druid', number_of_decks=10)
result = list(simulate(ctx, drafts))
X, y = construct_training_data(drafts, result)
assert len(result) == 45
assert X['deck'].sum(axis=1).tolist()[:30] == list(range(30))
assert X['choices'].sum(axis=1).tolist()[:30] == list(np.full((30,), 3))
assert len(y) == 10 * 30
assert list(y.columns) == sorted(list(y.columns))

45


In [7]:
draft_class = 'Druid'
draft_cards = sorted(get_class_cards(draft_class))
id_to_cards = {enumerate(draft_cards)}
cards_to_id = {card: index for index, card in enumerate(draft_cards)}
# Create Model
from tensorflow.python import keras

num_of_cards = arena.ArenaCardPicker()
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=(len(draft_cards)*2,)),
    keras.layers.Dense(len(draft_cards)),
    keras.layers.Softmax()
])
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [8]:
def model_draft_policy(model, cards_to_id):
    def draft(decks, draft_choices):
        draft_choices = pd.DataFrame(draft_choices)
        X = construct_prediction_data(decks, draft_choices.apply(
            lambda c: arena.ArenaSelection(c.tolist(), None), axis=1
        ).tolist())
        output = model.predict(X)
        choice_ids = draft_choices.applymap(cards_to_id.get)
        selected = output[np.arange(output.shape[0]), choice_ids.T].T
        selected = np.argmax(selected, axis=1)
        return draft_choices.values[np.arange(len(decks)), selected]
    return draft
    
_ = draft_decks(draft_class='Druid', number_of_decks=10, strategy=model_draft_policy(model, cards_to_id))