# Loading the Data

To begin, let's load the data!

In [107]:
import json

with open('s17.json') as user_file:
  file_contents = user_file.read()
data = json.loads(file_contents)

# Data Parsing

To make sense of the data, we need to convert our json into tensorflow tensors

In [108]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor

## Data Example

Here's what a sample point in the data looks like:

In [69]:
split = random_split(data, [0.8,0.2])

In [53]:
data[10000]

{'_id': '9bbf158fc296fbb8194beb4a5879e67ec6061ca29e10d8522202aa8758d3',
 'season': 'lrq7y3q3',
 'winner': 'https://data.ninjakiwi.com/battles2/users/9ced168cdd92afa311118719577ab022cd0749b9cc19dd6d',
 'loser': 'https://data.ninjakiwi.com/battles2/users/9cba17838ec0f8ad1d468e180e26e17ecd5149ee97108d3f',
 'body': {'id': '9bbf158fc296fbb8194beb4a5879e67ec6061ca29e10d8522202aa8758d3',
  'gametype': 'Ranked',
  'map': 'castle_ruins',
  'duration': 65,
  'endRound': 2,
  'mapURL': 'https://static-api.nkstatic.com/appdocs/4/assets/opendata/7f62de8ad4252bfe7a9ea774b3c96da6_castle_ruins.png',
  'playerLeft': {'displayName': 'G.O.A.T',
   'hero': 'Obyn_Ocean',
   'heroPortrait': 'https://static-api.nkstatic.com/appdocs/4/assets/opendata/d94072f8fda89e4a1db11ba14c653ace_ocean_xp_large.png',
   'towerone': 'SniperMonkey',
   'towertwo': 'NinjaMonkey',
   'towerthree': 'Alchemist',
   'currentUser': False,
   'result': 'lose',
   'profileURL': 'https://data.ninjakiwi.com/battles2/users/9cba17838ec0

## Encoding

To allow our ML algo to make sense of the data, we will embed the tower, map, and hero labels into *integers*. Our ML algo will use embedding layers to try and infer the relationship between the different labels in each class.

### Encoding Function

In [109]:
def encode(arr):
    i = 0
    return_dict = {}
    for item in arr:
        return_dict[item] = i
        i += 1
    return return_dict

### Tower Encoding

In [110]:
tower_names = [
    # Primary
    'DartMonkey',
    'TackShooter',
    'BombShooter',
    'BoomerangMonkey',
    'GlueGunner',
    'IceMonkey',

    # Military
    'DartlingGunner',
    'MonkeySub',
    'SniperMonkey',
    'MonkeyAce',
    'MortarMonkey',
    'HeliPilot',
    'MonkeyBuccaneer',

    # Magic
    'WizardMonkey',
    'Alchemist',
    'Druid',
    'SuperMonkey',
    'NinjaMonkey',

    # Support
    'SpikeFactory',
    'MonkeyVillage',
    'BananaFarm',
    'EngineerMonkey'
]

tower_encoding = encode(tower_names)

### Hero Encoding

In [111]:
hero_names = [
    'Quincy',
    'Gwendolin',
    'Obyn_Ocean',
    'Quincy_Cyber',
    'Obyn',
    'StrikerJones',
    'Gwendolin_Science',
    'StrikerJones_Biker',
    'Churchill',
    'Churchill_Sentai',
    'Benjamin',
    'Benjamin_DJ',
    'Ezili',
    'Ezili_SmudgeCat',
    'PatFusty',
    'PatFusty_Snowman',
    'Jericho',
    'Jericho_Highwayman',
    'Jericho_StarCaptain',
    'Adora'
]

hero_encoding = encode(hero_names)

### Map Encoding

In [112]:
map_names = [
    'banana_depot_scene',
    'basalt_columns',
    'building_site_scene',
    'bloon_bot_factory',
    'basalt_colums',
    'castle_ruins',
    'cobra_command',
    'dino_graveyard',
    'garden',
    'glade',
    'inflection',
    'koru',
    'oasis',
    'ports',
    'sands_of_time',
    'star',
    'pirate_cove',
    'precious_space',
    'sun_palace',
    'off_tide',
    'salmon_pool'
]

map_encoding = encode(map_names)

## Data Cleaning

supertiger reported that, because of an oversight, some ZOMG arena matches leaked into the database. For now, we won't worry too much about this except to filter out the matches that did not take place on maps in the HOM pool.

In [113]:
i = 0
while i < len(data):
    if data[i]['body']['map'] not in map_names:
        data.pop(i)
    else:
        i += 1

# Creating a Dataset For Matches

Let's create a dataset with the columns organized as follows:
- Map
- Left Hero
- Left Tower 1
- Left Tower 2
- Left Tower 3
- Right Hero
- Right Tower 1
- Right Tower 2
- Right Tower 3
- Winner (0 if left player wins, 1 if right player wins)


In [114]:
with open('s17.json') as user_file:
        file_contents = user_file.read()
data = json.loads(file_contents)

# Clean the data of any matches that didn't take place in HOM
i = 0
while i < len(data):
    if data[i]['body']['map'] not in map_names:
        data.pop(i)
    else:
        i += 1

split = random_split(data, [0.8,0.2])
data_train = split[0]
data_test = split[1]

class S17(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        body = self.data[idx]['body']
    
        # Map
        match_map = map_encoding[body['map']]

        # Left player's picks
        left_hero = hero_encoding[body['playerLeft']['hero']]
        left_t1 = tower_encoding[body['playerLeft']['towerone']]
        left_t2 = tower_encoding[body['playerLeft']['towertwo']]
        left_t3 = tower_encoding[body['playerLeft']['towerthree']]

        # Right player's picks
        right_hero = hero_encoding[body['playerRight']['hero']]
        right_t1 = tower_encoding[body['playerRight']['towerone']]
        right_t2 = tower_encoding[body['playerRight']['towertwo']]
        right_t3 = tower_encoding[body['playerRight']['towerthree']]

        # Who won?
        if body['playerRight']['result'] == 'win':
            winner = 1
        elif body['playerRight']['result'] == 'lose':
            winner = 0
        else:
            winner = 0.5

        return torch.tensor([match_map, left_hero, left_t1, left_t2, left_t3, right_hero, right_t1, right_t2, right_t3]), winner

## Demonstration

Here, I print a few data points to show what the data looks like:

In [115]:
s17_train = S17(data_train)
s17_test = S17(data_test)

for i in range(10):
    print(s17_train[i])

(tensor([ 6,  7,  2, 18, 10,  7,  2,  8, 19]), 1)
(tensor([19,  0,  3, 18,  7, 16,  3, 20, 10]), 0)
(tensor([17,  6,  3,  7, 13,  0, 14, 12, 18]), 0)
(tensor([14,  0, 12, 14, 18,  1,  3, 17, 18]), 0)
(tensor([16, 17,  1, 10, 20, 18,  1, 10, 20]), 0)
(tensor([ 1,  3,  6, 20, 14,  4, 14, 13,  6]), 1)
(tensor([12,  1, 20, 17, 14, 16,  3, 10, 20]), 0)
(tensor([18, 19,  6, 16, 19, 17,  9, 20, 21]), 1)
(tensor([ 8, 18,  0, 10, 12,  0, 12, 18, 14]), 1)
(tensor([15,  6, 17, 11, 14,  4, 15,  7, 20]), 1)


In [116]:
batch_size = 64

# Create data loaders.
s17_train_loader = DataLoader(s17_train, batch_size=batch_size)
s17_test_loader = DataLoader(s17_test, batch_size=batch_size)

for X, y in s17_test_loader:
    print(f"Shape of X [N, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, W]: torch.Size([64, 9])
Shape of y: torch.Size([64]) torch.float32


# Model Construction

To begin, let's use CUDA to speed up the training a bit

In [119]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
device= "cpu"

Using cuda device


Now we define the model by subclassing with nn.Module

In [125]:
hero_encoding

{'Quincy': 0,
 'Gwendolin': 1,
 'Obyn_Ocean': 2,
 'Quincy_Cyber': 3,
 'Obyn': 4,
 'StrikerJones': 5,
 'Gwendolin_Science': 6,
 'StrikerJones_Biker': 7,
 'Churchill': 8,
 'Churchill_Sentai': 9,
 'Benjamin': 10,
 'Benjamin_DJ': 11,
 'Ezili': 12,
 'Ezili_SmudgeCat': 13,
 'PatFusty': 14,
 'PatFusty_Snowman': 15,
 'Jericho': 16,
 'Jericho_Highwayman': 17,
 'Jericho_StarCaptain': 18,
 'Adora': 19}

In [160]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.tower_embedding = nn.Sequential(nn.Embedding(22, 3))
        self.hero_embedding = nn.Sequential(nn.Embedding(20, 3))
        self.map_embedding = nn.Sequential(nn.Embedding(21, 3))

        self.linear_sigm = nn.Sequential(nn.Linear(15, 1), nn.Sigmoid())

    def forward(self, x):
        # Apply the embedding to the match location 
        # print(x[0])
        map_vec = self.map_embedding(x[:,0])

        left_hero_vec = self.hero_embedding(x[:,1])
        left_comp_vec = self.tower_embedding(x[:,2]) + self.tower_embedding(x[:,3]) + self.tower_embedding(x[:,4])

        right_hero_vec = self.hero_embedding(x[:,5])
        right_comp_vec = self.tower_embedding(x[:,6]) + self.tower_embedding(x[:,7]) + self.tower_embedding(x[:,8])

        # Concatenate the vectors all together nice and neatly!
        embedding_vec = torch.cat((map_vec, left_hero_vec, left_comp_vec, right_hero_vec, right_comp_vec),1)

        # Now, pass the embedding vector through a linear layer
        y = self.linear_sigm(embedding_vec)
        # print(y[0])
        return torch.flatten(y)

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (tower_embedding): Sequential(
    (0): Embedding(22, 3)
  )
  (hero_embedding): Sequential(
    (0): Embedding(20, 3)
  )
  (map_embedding): Sequential(
    (0): Embedding(21, 3)
  )
  (linear_sigm): Sequential(
    (0): Linear(in_features=15, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


Setting the loss function...

In [205]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

In [209]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y.float())

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [210]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            # print(torch.abs(torch.round(y - pred)))
            # print(torch.tensor([1 for i in range(batch_size)]))
            # print(torch.tensor([1 for i in range(y.size()[0])]) - torch.abs(torch.round(y - pred)))
            correct += torch.sum(torch.tensor([1 for i in range(y.size()[0])]) - torch.abs(torch.round(y - pred)))
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [211]:
epochs = 500
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(s17_train_loader, model, loss_fn, optimizer)
    test(s17_test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.348931  [   64/37138]
loss: 0.315661  [ 6464/37138]
loss: 0.380255  [12864/37138]
loss: 0.391279  [19264/37138]
loss: 0.332899  [25664/37138]
loss: 0.410111  [32064/37138]
Test Error: 
 Accuracy: 55.9%, Avg loss: 0.355352 

Epoch 2
-------------------------------
loss: 0.348669  [   64/37138]
loss: 0.315439  [ 6464/37138]
loss: 0.379752  [12864/37138]
loss: 0.391018  [19264/37138]
loss: 0.332596  [25664/37138]
loss: 0.409847  [32064/37138]
Test Error: 
 Accuracy: 55.9%, Avg loss: 0.355015 

Epoch 3
-------------------------------
loss: 0.348401  [   64/37138]
loss: 0.315214  [ 6464/37138]
loss: 0.379248  [12864/37138]
loss: 0.390755  [19264/37138]
loss: 0.332292  [25664/37138]
loss: 0.409579  [32064/37138]
Test Error: 
 Accuracy: 55.9%, Avg loss: 0.354675 

Epoch 4
-------------------------------
loss: 0.348129  [   64/37138]
loss: 0.314986  [ 6464/37138]
loss: 0.378742  [12864/37138]
loss: 0.390491  [19264/37138]
loss: 0.331988  [25664/3

In [189]:
model.eval()

NeuralNetwork(
  (tower_embedding): Sequential(
    (0): Embedding(22, 3)
  )
  (hero_embedding): Sequential(
    (0): Embedding(20, 3)
  )
  (map_embedding): Sequential(
    (0): Embedding(21, 3)
  )
  (linear_sigm): Sequential(
    (0): Linear(in_features=15, out_features=1, bias=True)
    (1): Sigmoid()
  )
)

In [236]:
import random
ind = random.randint(0,len(s17_train)-1)
pred = model(s17_train[ind][0].reshape(1,9))
print(s17_train[ind][0])
print(pred)
print(s17_train[ind][1])

tensor([16,  1,  4,  7,  1,  1,  1, 13, 20])
tensor([0.4962], grad_fn=<ViewBackward0>)
0


In [193]:
s17_train[5000][0].reshape(1,9)

tensor([[ 7,  1,  7, 20, 16,  1,  6, 19, 15]])

In [84]:
s17_test[0][0][0:2]

tensor([18,  8])