# Preliminaries

In [1]:
import torch
from torch import nn
from torch.utils.data import random_split, Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

import load_data as ld
import pandas as pd

# Computing Stats

We build a Pandas Dataframe to track key stats for each player, with each row containing the following:
- User ID
- Username
- Matches Played
- Win %
- Win % for *Each Tower*

To add later:
- Win % for each hero
- Win % for each map

The strategy here is to first build a dictionary with the keys being user ID's and the values being lists containing the other *47* parameters.

## Building the DataFrame With Dictionaries

When computing winrates, we will penalize winrates if the amount of games played is low. This ideally will help the AI make better decisions.

In [43]:
def adj_wr(x, n, stds = 1, iter = 25):
    def foo(p, n):
        return p + stds*(p*(1-p)/n)**0.5 - x
    
    a = 0
    b = 1
    for i in range(iter):
        c = 0.5*(a + b)
        if foo(c, n) > 0:
            b = c
        elif foo(c, n) < 0:
            a = c
        else:
            return c
    return c

First, let's build a dictionary of all the data.

In [44]:
s17_player_stats = {}
for i in range(len(ld.data)):
    body = ld.data[i]['body']

    players = ['playerLeft', 'playerRight']
    towers = ['towerone', 'towertwo', 'towerthree']

    for player in players:
        player_side = body[player]
        user_id = player_side['profileURL'][42:]
        username = player_side['displayName']

        # Create a new default entry if this is the first time we've come across this player
        if user_id not in s17_player_stats.keys():
            s17_player_stats[user_id] = [username]
            s17_player_stats[user_id].extend([0 for i in range(46)])

        # Increment the matches played by one
        s17_player_stats[user_id][1] += 1

        # If the player won, increment matches won by one
        if player_side['result'] == 'win':
            s17_player_stats[user_id][2] += 1

        # Which towers did the player use?
        for tower in towers:
            # print(s17_player_stats[user_id][3])
            s17_player_stats[user_id][2*ld.tower_encoding[player_side[tower]] + 3] += 1 # Matches played with a tower
            if player_side['result'] == 'win': 
                s17_player_stats[user_id][2*ld.tower_encoding[player_side[tower]] + 4] += 1 # Matches won with a tower
    
for key in s17_player_stats.keys():
    s17_player_stats[key][2] = s17_player_stats[key][2]/s17_player_stats[key][1] # Get overall winrate
    i = 4
    while i < len(s17_player_stats[key]):
        try:
            s17_player_stats[key][i] = adj_wr(s17_player_stats[key][i]/s17_player_stats[key][i-1], s17_player_stats[key][i-1])
        except: 
            s17_player_stats[key][i] = 0.0 # Get winrate for each tower
        s17_player_stats[key].pop(i-1)
        i += 1


Let's try and build a dataset object based on what we have so far

In [79]:
test_id = '9fbf1e8e8dc3ada34d138d1f5625e375cf521aed9f448d39'

In [107]:
class S17(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        body = self.data[idx]['body']

        players = ['playerLeft', 'playerRight']
        towers = ['towerone', 'towertwo', 'towerthree']

        x_tensor = []
        for player in players:
            player_id = body[player]['profileURL'][42:]
            player_stats = s17_player_stats[player_id]

            tower_wrs = []
            for tower in towers:
                # Retrieve the WR each player has with each of the towers they are playing.
                tower_wrs.append(float(s17_player_stats[player_id][ld.tower_encoding[body[player][tower]] + 3]))

            x_tensor.append(float(s17_player_stats[player_id][1])) #Matches Played
            x_tensor.append(float(s17_player_stats[player_id][2])) #Overall WR
            x_tensor.append(sum(tower_wrs)) # WRs with each tower

        # Who won?
        if body['playerRight']['result'] == 'win':
            winner = 1
        elif body['playerRight']['result'] == 'lose':
            winner = 0
        else:
            winner = 0.5
        
        return torch.tensor(x_tensor), winner

In [161]:
s17_dataset = S17(ld.data)
split = random_split(s17_dataset, [0.2,0.8])
data_train = split[0]
data_test = split[1]

In [171]:
batch_size = 64
s17_train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
s17_test_loader = DataLoader(data_test, batch_size=batch_size, shuffle=True)

for X, y in s17_test_loader:
    print(f"Shape of X [N, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, W]: torch.Size([64, 6])
Shape of y: torch.Size([64]) torch.int64


In [172]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
device = "cpu"

Using cuda device


In [203]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.logistic = nn.Sequential(nn.Linear(6,6), nn.Linear(6,1), nn.Sigmoid())

    def forward(self, x):
        y = self.logistic(x)
        return torch.flatten(y)

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (logistic): Sequential(
    (0): Linear(in_features=6, out_features=6, bias=True)
    (1): Linear(in_features=6, out_features=1, bias=True)
    (2): Sigmoid()
  )
)


In [204]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-4)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        # print(X,y)
        pred = model(X)
        # print(pred)
        # print(y)
        loss = loss_fn(pred, y.float())

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            # print(list(model.parameters())[0])

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y.float()).item()
            # print(torch.abs(torch.round(y - pred)))
            # print(torch.tensor([1 for i in range(batch_size)]))
            # print(torch.tensor([1 for i in range(y.size()[0])]) - torch.abs(torch.round(y - pred)))
            a = torch.tensor([1 for i in range(y.size()[0])]).to(device)
            b = torch.abs(torch.round(y - pred)).to(device)
            correct += torch.sum(a - b)
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [207]:
epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(s17_train_loader, model, loss_fn, optimizer)
    test(s17_test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 141.400024  [   64/ 9285]
Parameter containing:
tensor([[-0.2577,  0.1410, -0.0886, -0.1564, -0.1577, -0.1503],
        [-0.2552, -0.2928,  0.1083,  0.1787, -0.2208,  0.0540],
        [ 0.3549,  0.1834, -0.3723,  0.2425, -0.2120, -0.3731],
        [-0.0099,  0.2792,  0.0778,  0.0826,  0.0932,  0.0333],
        [ 0.0701,  0.2088,  0.3697,  0.1826,  0.3547, -0.0445],
        [-0.3590,  0.0244, -0.1649, -0.2490, -0.2890,  0.2453]],
       requires_grad=True)
loss: 131.011566  [ 6464/ 9285]
Parameter containing:
tensor([[-0.2577,  0.1410, -0.0886, -0.1565, -0.1577, -0.1503],
        [-0.2552, -0.2928,  0.1083,  0.1787, -0.2207,  0.0540],
        [ 0.3549,  0.1834, -0.3723,  0.2426, -0.2120, -0.3731],
        [-0.0099,  0.2792,  0.0778,  0.0826,  0.0932,  0.0332],
        [ 0.0701,  0.2088,  0.3697,  0.1825,  0.3547, -0.0445],
        [-0.3590,  0.0244, -0.1649, -0.2490, -0.2890,  0.2453]],
       requires_grad=True)


Test Error: 
 Accuracy: 50.2%, Avg loss: 133.191035 

Epoch 2
-------------------------------
loss: 133.084671  [   64/ 9285]
Parameter containing:
tensor([[-0.2577,  0.1410, -0.0886, -0.1565, -0.1577, -0.1503],
        [-0.2552, -0.2928,  0.1083,  0.1788, -0.2207,  0.0540],
        [ 0.3549,  0.1834, -0.3723,  0.2427, -0.2120, -0.3731],
        [-0.0099,  0.2792,  0.0778,  0.0826,  0.0932,  0.0332],
        [ 0.0701,  0.2088,  0.3697,  0.1824,  0.3547, -0.0445],
        [-0.3590,  0.0244, -0.1649, -0.2491, -0.2890,  0.2452]],
       requires_grad=True)
loss: 108.131134  [ 6464/ 9285]
Parameter containing:
tensor([[-0.2577,  0.1410, -0.0886, -0.1566, -0.1577, -0.1503],
        [-0.2552, -0.2928,  0.1083,  0.1788, -0.2207,  0.0540],
        [ 0.3549,  0.1834, -0.3723,  0.2428, -0.2120, -0.3730],
        [-0.0099,  0.2792,  0.0778,  0.0826,  0.0932,  0.0332],
        [ 0.0701,  0.2088,  0.3697,  0.1823,  0.3547, -0.0445],
        [-0.3590,  0.0244, -0.1649, -0.2492, -0.2890,  0.2452]],
 

KeyboardInterrupt: 

In [101]:
s17_dataset[100]

(tensor([1.0790e+03, 7.5533e-01, 2.0790e+00, 3.1700e+02, 5.2050e-01, 1.2384e+00]),
 1)

In [58]:
user_id = 'https://data.ninjakiwi.com/battles2/users/9ce942df8dc4fff74c13d9490b25e77f98074ebe9841896c'
user_id[42:]

'9ce942df8dc4fff74c13d9490b25e77f98074ebe9841896c'

# Creating a Dataset For Matches

Let's create a dataset with the columns organized as follows:
- Map
- Left Hero
- Left Tower 1
- Left Tower 2
- Left Tower 3
- Right Hero
- Right Tower 1
- Right Tower 2
- Right Tower 3
- Winner (0 if left player wins, 1 if right player wins)


In [42]:
split = random_split(ld.data, [0.8,0.2])
data_train = split[0]
data_test = split[1]

class S17(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        body = self.data[idx]['body']
    
        # Map
        match_map = ld.map_encoding[body['map']]

        # Left player's picks
        left_hero = ld.hero_encoding[body['playerLeft']['hero']]
        left_t1 = ld.tower_encoding[body['playerLeft']['towerone']]
        left_t2 = ld.tower_encoding[body['playerLeft']['towertwo']]
        left_t3 = ld.tower_encoding[body['playerLeft']['towerthree']]

        # Right player's picks
        right_hero = ld.hero_encoding[body['playerRight']['hero']]
        right_t1 = ld.tower_encoding[body['playerRight']['towerone']]
        right_t2 = ld.tower_encoding[body['playerRight']['towertwo']]
        right_t3 = ld.tower_encoding[body['playerRight']['towerthree']]

        # Who won?
        if body['playerRight']['result'] == 'win':
            winner = 1
        elif body['playerRight']['result'] == 'lose':
            winner = 0
        else:
            winner = 0.5

        return torch.tensor([match_map, left_hero, left_t1, left_t2, left_t3, right_hero, right_t1, right_t2, right_t3]), winner

## Demonstration

Here, I print a few data points to show what the data looks like:

In [43]:
s17_train = S17(data_train)
s17_test = S17(data_test)

for i in range(10):
    print(s17_train[i])

(tensor([10, 10,  0, 18, 20,  6,  0, 20,  7]), 1)
(tensor([ 7,  6,  0,  7, 20, 17,  9, 20, 21]), 0)
(tensor([ 5, 18,  7,  5,  3,  2, 17,  6, 20]), 1)
(tensor([ 1,  7, 21,  9, 18, 10,  6, 14,  4]), 0)
(tensor([ 0,  0,  1, 13, 20, 16,  2,  1,  7]), 0)
(tensor([15, 10, 18,  1,  7,  1,  7, 16, 20]), 1)
(tensor([15,  8, 13, 20, 12, 15,  6,  5, 17]), 0)
(tensor([11,  0,  3, 10, 18,  0, 20, 10,  1]), 1)
(tensor([ 6,  5, 11, 10,  2,  5,  2, 10, 11]), 1)
(tensor([19,  8, 15,  8, 19,  2, 17, 14,  4]), 0)


In [44]:
batch_size = 64

# Create data loaders.
s17_train_loader = DataLoader(s17_train, batch_size=batch_size)
s17_test_loader = DataLoader(s17_test, batch_size=batch_size)

for X, y in s17_test_loader:
    print(f"Shape of X [N, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, W]: torch.Size([64, 9])
Shape of y: torch.Size([64]) torch.int64


# Model Construction

To begin, let's use CUDA to speed up the training a bit

In [45]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
device= "cpu"

Using cuda device


Now we define the model by subclassing with nn.Module

In [16]:
class NeuralNetwork(nn.Module):
    def __init__(self, dim = 3):
        super().__init__()
        self.tower_embedding = nn.Sequential(nn.Embedding(22, dim))
        self.hero_embedding = nn.Sequential(nn.Embedding(20, dim))
        self.map_embedding = nn.Sequential(nn.Embedding(21, dim))

        self.linear_sigm = nn.Sequential(nn.Linear(5*dim, 1), nn.Sigmoid())

    def forward(self, x):
        # Apply the embedding to the match location 
        # print(x[0])
        map_vec = self.map_embedding(x[:,0])

        left_hero_vec = self.hero_embedding(x[:,1])
        left_comp_vec = self.tower_embedding(x[:,2]) + self.tower_embedding(x[:,3]) + self.tower_embedding(x[:,4])

        right_hero_vec = self.hero_embedding(x[:,5])
        right_comp_vec = self.tower_embedding(x[:,6]) + self.tower_embedding(x[:,7]) + self.tower_embedding(x[:,8])

        # Concatenate the vectors all together nice and neatly!
        embedding_vec = torch.cat((map_vec, left_hero_vec, left_comp_vec, right_hero_vec, right_comp_vec),1)

        # Now, pass the embedding vector through a linear layer
        y = self.linear_sigm(embedding_vec)
        # print(y[0])
        return torch.flatten(y)

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (tower_embedding): Sequential(
    (0): Embedding(22, 3)
  )
  (hero_embedding): Sequential(
    (0): Embedding(20, 3)
  )
  (map_embedding): Sequential(
    (0): Embedding(21, 3)
  )
  (linear_sigm): Sequential(
    (0): Linear(in_features=15, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


Setting the loss function...

In [113]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

In [114]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y.long())

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [115]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y.long()).item()
            # print(torch.abs(torch.round(y - pred)))
            # print(torch.tensor([1 for i in range(batch_size)]))
            # print(torch.tensor([1 for i in range(y.size()[0])]) - torch.abs(torch.round(y - pred)))
            correct += torch.sum(torch.tensor([1 for i in range(y.size()[0])]) - torch.abs(torch.round(y - pred)))
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [39]:
epochs = 500
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(s17_train_loader, model, loss_fn, optimizer)
    test(s17_test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: -0.999989  [   64/37138]
loss: -0.999985  [ 6464/37138]
loss: -0.999966  [12864/37138]
loss: -0.999999  [19264/37138]
loss: -0.999997  [25664/37138]
loss: -0.999997  [32064/37138]
Test Error: 
 Accuracy: 50.1%, Avg loss: -0.999350 

Epoch 2
-------------------------------
loss: -0.999991  [   64/37138]
loss: -0.999987  [ 6464/37138]
loss: -0.999971  [12864/37138]
loss: -0.999999  [19264/37138]
loss: -0.999998  [25664/37138]
loss: -0.999997  [32064/37138]
Test Error: 
 Accuracy: 50.1%, Avg loss: -0.999429 

Epoch 3
-------------------------------
loss: -0.999993  [   64/37138]
loss: -0.999989  [ 6464/37138]
loss: -0.999974  [12864/37138]
loss: -0.999999  [19264/37138]
loss: -0.999998  [25664/37138]
loss: -0.999998  [32064/37138]
Test Error: 
 Accuracy: 50.1%, Avg loss: -0.999491 

Epoch 4
-------------------------------
loss: -0.999994  [   64/37138]
loss: -0.999991  [ 6464/37138]
loss: -0.999977  [12864/37138]
loss: -1.000000  [19264/37138]

KeyboardInterrupt: 

In [189]:
model.eval()

NeuralNetwork(
  (tower_embedding): Sequential(
    (0): Embedding(22, 3)
  )
  (hero_embedding): Sequential(
    (0): Embedding(20, 3)
  )
  (map_embedding): Sequential(
    (0): Embedding(21, 3)
  )
  (linear_sigm): Sequential(
    (0): Linear(in_features=15, out_features=1, bias=True)
    (1): Sigmoid()
  )
)

In [236]:
import random
ind = random.randint(0,len(s17_train)-1)
pred = model(s17_train[ind][0].reshape(1,9))
print(s17_train[ind][0])
print(pred)
print(s17_train[ind][1])

tensor([16,  1,  4,  7,  1,  1,  1, 13, 20])
tensor([0.4962], grad_fn=<ViewBackward0>)
0


In [193]:
s17_train[5000][0].reshape(1,9)

tensor([[ 7,  1,  7, 20, 16,  1,  6, 19, 15]])

In [84]:
s17_test[0][0][0:2]

tensor([18,  8])