In [None]:
#%load_ext tensorboard
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorboard.plugins import projector
import json
from collections import defaultdict

In [None]:
data_dir = './data'
log_dir = './logs/embedding/'
train_path = os.path.join(data_dir, 'train.csv')
hero_path = os.path.join(data_dir, 'hero_names.json')

embedding_size=64
dropout_rate=0
activation='tanh'
n_hidden_predictor=128
learning_rate=1e-1

In [None]:
with open(hero_path, 'r') as file:
    hero_names = json.load(file)

labeled_data = pd.read_csv(train_path)
no_winner = labeled_data['radiant_win'].isna()
labeled_data = labeled_data[~no_winner]

In [None]:
# Convert hero names json to be keyed on id
hero_id_info = {}
for name, hero_dict in hero_names.items():
    this_id = hero_dict['id']
    hero_id_info[this_id] = hero_dict

In [None]:
# Create train test/splits from match data
n_data = len(labeled_data)

random = np.random.RandomState(seed=116)
shuffled_rows = np.arange(n_data)
random.shuffle(shuffled_rows, )

train_frac = 0.8
validate_frac = 0.1
n_train = int(train_frac * n_data)
n_val = int(validate_frac * n_data)
train_rows = shuffled_rows[:n_train]
val_rows = shuffled_rows[n_train:n_train+n_val]

train_data = labeled_data.iloc[train_rows, :]
val_data = labeled_data.iloc[val_rows, :]

In [None]:
class EmbeddingModel(tf.keras.Model):
    def __init__(self, pool_size=123, embedding_size=32, team_size=5,
                 n_hidden_predictor=128, dropout_rate=0.1, activation='tanh'):
        super(EmbeddingModel, self).__init__()

        self.embedding = tf.keras.layers.Embedding(pool_size, embedding_size,
                                                   input_length=team_size)

        self.predictor = tf.keras.Sequential(
            [
             tf.keras.layers.InputLayer(input_shape=(embedding_size*2 + 2,)),
             tf.keras.layers.Dropout(dropout_rate),
             tf.keras.layers.Dense(units=n_hidden_predictor, activation=activation),
             tf.keras.layers.Dense(units=n_hidden_predictor, activation=activation),
             tf.keras.layers.Dense(units=1, activation=tf.nn.sigmoid)
            ]
            )
        return


    def call(self, inputs):
        radiant, dire, radiant_wr, dire_wr = inputs

        radiant_embedding = self.embedding(radiant)
        dire_embedding = self.embedding(dire)

        radiant_embedding_sum = tf.reduce_sum(radiant_embedding, axis=1)
        dire_embedding_sum = tf.reduce_sum(dire_embedding, axis=1)

        pred_inputs = tf.concat((radiant_embedding_sum, dire_embedding_sum, radiant_wr, dire_wr), axis=-1)
        prediction = self.predictor(pred_inputs)

        return prediction



def get_win_rates(train_radiant, train_dire, train_y, n_train):
    # Calculate historical winrates:
    win_counts = defaultdict(lambda: 0)
    game_counts = defaultdict(lambda: 0)
    for row in range(n_train):
        radiant = train_radiant.iloc[row, :]
        dire = train_dire.iloc[row, :]
        radiant_win = train_y.iloc[row]

        for hero in radiant:
            game_counts[hero] += 1
        for hero in dire:
            game_counts[hero] += 1

        if radiant_win:
            team = radiant
        else:
            team = dire

        for hero in team:
            win_counts[hero] += 1

    win_rates = {}
    for hero in win_counts.keys():
        win_rates[hero] = win_counts[hero] / game_counts[hero]

    return win_rates


def get_heroes_and_winner(df):
    radiant_cols = [f'r{idx}_hero' for idx in range(1,6)]
    dire_cols = [f'd{idx}_hero' for idx in range(1,6)]


    # make id's start at 0
    radiant_heroes = df[radiant_cols] - 1
    dire_heroes = df[dire_cols] - 1
    winners = df['radiant_win']

    return radiant_heroes, dire_heroes, winners

In [None]:
train_radiant, train_dire, train_y = get_heroes_and_winner(train_data)
val_radiant, val_dire, val_y = get_heroes_and_winner(val_data)

In [None]:
win_rates = get_win_rates(train_radiant, train_dire, train_y, n_train)

# Feature of average team winrate

radiant_avg_wr = np.zeros(n_train)
dire_avg_wr = np.zeros(n_train)
val_radiant_wr = np.zeros(n_val)
val_dire_wr = np.zeros(n_val)

for row in range(n_train):
    radiant = train_radiant.iloc[row, :]
    dire = train_dire.iloc[row, :]

    radiant_winrates = [win_rates[hero] for hero in radiant]
    dire_winrates = [win_rates[hero] for hero in dire]

    radiant_avg = np.mean(radiant_winrates)
    dire_avg = np.mean(dire_winrates)

    radiant_avg_wr[row] = radiant_avg
    dire_avg_wr[row] = dire_avg

for row in range(n_val):
    radiant = val_radiant.iloc[row, :]
    dire = val_dire.iloc[row, :]

    radiant_winrates = [win_rates[hero] for hero in radiant]
    dire_winrates = [win_rates[hero] for hero in dire]

    radiant_avg = np.mean(radiant_winrates)
    dire_avg = np.mean(dire_winrates)

    val_radiant_wr[row] = radiant_avg
    val_dire_wr[row] = dire_avg

In [None]:
worst_hero = min(win_rates, key=win_rates.get)
best_hero = max(win_rates, key=win_rates.get)
print(f'Best hero is {hero_id_info[best_hero]["localized_name"]} with a winrate of {100*win_rates[best_hero]:.2f}%')
print(f'Worst hero is {hero_id_info[worst_hero]["localized_name"]} with a winrate of {100*win_rates[worst_hero]:.2f}%')

In [None]:
model = EmbeddingModel(pool_size=112, embedding_size=embedding_size, dropout_rate=dropout_rate,
                           n_hidden_predictor=n_hidden_predictor, activation=activation)
loss = tf.keras.losses.BinaryCrossentropy()

In [None]:
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

csv_logs = os.path.join(log_dir, 'metrics.csv')
callbacks = [tf.keras.callbacks.TensorBoard(log_dir=log_dir),
             tf.keras.callbacks.CSVLogger(csv_logs)]

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate)
model.compile(optimizer=optimizer, loss=loss, metrics=[tf.metrics.BinaryAccuracy()])

In [None]:
model.fit(x=[tf.cast(train_radiant, dtype=tf.int32), tf.cast(train_dire, dtype=tf.int32),
             tf.cast(radiant_avg_wr, dtype=tf.float32), tf.cast(dire_avg_wr, dtype=tf.float32)],
          y=tf.cast(train_y.astype(int), dtype=tf.float32),
          callbacks=callbacks, shuffle=True,
          batch_size=batch_size, epochs=10000,
          validation_data=
          ([tf.cast(val_radiant, dtype=tf.int32), tf.cast(val_dire, dtype=tf.int32),
            tf.cast(val_radiant_wr, dtype=tf.float32), tf.cast(val_dire_wr, dtype=tf.float32)],
           tf.cast(val_y.astype(int), dtype=tf.float32)))

In [None]:
# Save Labels separately on a line-by-line manner.
with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f:
  for id in range(1,113):
    if id in hero_id_info.keys():
        hero_name = hero_id_info[id]['localized_name']
        f.write(f'{hero_name}\n')
    else:
        f.write('Unknown\n')


# Save the weights we want to analyze as a variable. Note that the first
# value represents any unknown word, which is not in the metadata, here
# we will remove this value.
weights = tf.Variable(model.embedding.get_weights()[0])
# Create a checkpoint from embedding, the filename and key are the
# name of the tensor.
checkpoint = tf.train.Checkpoint(embedding=weights)
checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))

In [None]:
id_he

In [None]:
n_train