### Setup

In [1]:
from importlib import reload
import pandas as pd
import numpy as np
import dataset
reload(dataset)
import pickle
import os

from spektral.data import Dataset, Graph, BatchLoader
from spektral.layers import ECCConv, EdgeConv, GlobalAvgPool
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import random

In [2]:
# Load combined data
df_raw = pd.read_csv('../data/combined.csv')
df = df_raw.copy()

# Load hero feature data
df_features = pd.read_csv('../data/features.csv')
df_features = df_features.set_index('hero_id')

# Load standard filter
df_filters = pd.read_csv('../models/filters.csv')

In [None]:
# Load graph dataset 50000 matches at a time
dir = '../data/graphs_v2_scaled/'
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step-1 if (start+step)<total else total-1
    path = dir+f'graphs_v2_scaled_{start}-{end}.pkl'
    print(path)
    file = open(path,'rb')
    if i==0:
        graphs = pickle.load(file)
    else:
        graphs = graphs + pickle.load(file)
    file.close()

In [None]:
# Remove attack_backswing feature
for i in range(0,len(graphs)):
    # if(i%100000==0):
    graphs[i].x = graphs[i].x[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19]] # remove attack_backswing as a feature
print('Attack backswing feature removed')

In [5]:
def get_filt_idx(filt):
    '''Returns indices of desired matches given a boolean array filter e.g. True, False, True returns [0,2]'''
    # DotaV2 data handling (one graphs for every match: 0-49999, 50000-99999, etc.)
    # Get indices of True values in filters
    filt_idx = [i for i, x in enumerate(filt) if x]
    return filt_idx

### Model 2.0 - Baseline

In [None]:
# OPTIONAL:
# Redefine edge features from single value 1 or 2 to hot encoded depth=2, so 1 = [1,0] and 2 = [0,1]
# for i, graph in enumerate(graphs):
#     if i%100000==0:
#         print(i)
#     e_cur = graph.e
#     e_new = np.zeros(shape=(10,10,2)) # empty edge feature matrix, single feature (team mates=1, enemy=2)
#     for row in range(0,10):
#         for col in range(0,10):
#             if e_cur[row,col,0]==1:
#                 e_new[row,col,0], e_new[row,col,1] = 1,0
#             else:
#                 e_new[row,col,0], e_new[row,col,1] = 0,1
#     graph.e = e_new

In [None]:
# OPTIONAL:
# Redefine edge features from single value 1 or 2 to 1 or 0 (single binary feature: teammate)
# for i, graph in enumerate(graphs):
#     if i%100000==0:
#         print(i)
#     e_cur = graph.e
#     e_new = np.ones(shape=(10,10,1)) # empty edge feature matrix, single feature (team mates=1, enemy=2)
#     for row in range(0,10):
#         for col in range(0,10):
#             if e_cur[row,col,0]==2:
#                 e_new[row,col,0] = 0
#     graph.e = e_new

In [None]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 30  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256 # Batch size

# Filtering graphs (standard only)
filt = df_filters['filt_std'].values
filt_idx = get_filt_idx(filt)
graphs_filt = graphs[filt_idx]
print('Filtered (standard only)')

# Train/valid/test split
idxs = np.random.permutation(len(graphs_filt))
split_va, split_te = int(0.64 * len(graphs_filt)), int(0.8 * len(graphs_filt))
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
data_tr = graphs_filt[idx_tr]
data_va = graphs_filt[idx_va]
data_te = graphs_filt[idx_te]

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_2_0(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = ECCConv(19, activation='relu')
        self.pool1 = GlobalAvgPool()
        self.dense = Dense(graphs_filt.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a, e = inputs
        x = self.conv1([x, a, e])
        x = self.pool1(x)
        x = self.dense(x)
        return x

# Compile and train model
model_2_0 = Net_2_0()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_2_0.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])
fit_log_2_0 = model_2_0.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

# Save training and validation results
epochs = list(range(1,len(fit_log_2_0.history['binary_accuracy'])+1))
training_accuracy = fit_log_2_0.history['binary_accuracy']
validation_accuracy = fit_log_2_0.history['val_binary_accuracy']
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv(f'../models/fit_records/model_2_0_accuracy.csv', index=False)

# Pickle model and training+validation log
filehandler = open(f'../models/fit_records/fit_log_2_0.pkl','wb')
pickle.dump(fit_log_2_0, filehandler)
filehandler = open(f'../models/fit_records/model_2_0.pkl','wb')
pickle.dump(model_2_0, filehandler)