In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import functional as F


device = "cuda" if torch.cuda.is_available() else "cpu"
if device == 'cuda':
    print(f'Using cuda device {torch.cuda.get_device_name()}')
else:
    print(f"Using {device} device")
    
data_path = '../../data/processed/processed_match_data_euw1_1667433600_1668520576.csv'
data = pd.read_csv(data_path)

Using cuda device NVIDIA GeForce RTX 3060 Laptop GPU


In [17]:
# Create dataset

# Drop unnecessary rows/cols
col_list = ['match_id']
col_list += ['p{j}_key'.format(j=i) for i in range(1,11)]
col_list += ['p{j}_champId'.format(j=i) for i in range(1,11)]
data = data.drop(columns = col_list)

# Create player_name dictionary
pres_players = ['<U>']
for i in range(1,11):
    pres_players += list(data[f'p{i}_name'].unique())
pres_players = list(set(pres_players))

num_players = len(pres_players)
key_to_player = {k:v for k, v in enumerate(pres_players)}
player_to_key = {k:v for v, k in enumerate(pres_players)}

# Create champ_name dictionary
pres_champs = []
for i in range(1, 11):
    pres_champs += list(data[f'p{i}_champName'].unique())
pres_champs = list(set(pres_champs))

num_champs = len(pres_champs)
key_to_champ = {k:v for k, v in enumerate(pres_champs)}
champ_to_key = {k:v for v, k in enumerate(pres_champs)}

# Format data according to champ and player dictionary
for i in range(1, 11):
    data[f'p{i}_champName'] = data[f'p{i}_champName'].map(champ_to_key)
    data = data.rename(columns = {f'p{i}_champName': f'p{i}_champKey'})
    data[f'p{i}_name'] = data[f'p{i}_name'].map(player_to_key)
    data = data.rename(columns = {f'p{i}_name': f'p{i}_key'})

In [18]:
# Giving players identified by '<U>' unique keys
for i in range(1, 11):
    cond = data[f'p{i}_key'] == player_to_key['<U>']
    data.loc[cond, [f'p{i}_key']] = (
        data.loc[cond, f'p{i}_champKey'] + num_players)

# Explanation - players having non-unique keys causes problems for the algorithm, there for unknown players are
# encoded according to the champion they pick (formula is given by num_players + champ_key).

In [19]:
# Turning DataFrame into tensor objects.
inputs = torch.tensor(data.drop(columns = ['winning_team']).values, dtype = torch.long)
labels = torch.tensor(data.loc[:, ['winning_team']].values - 1, dtype = torch.long)

# Test train split
inputs = inputs.reshape(-1, 2, 20)
labels = labels.reshape(-1, 2)

torch.manual_seed(912)
randi = torch.randperm(inputs.shape[0])
inputs_1, labels_1 = inputs[randi], labels[randi]
tti = round(0.8 * inputs_1.shape[0])
Xtr, Xte = inputs_1[:tti].reshape(-1,20).to(device), inputs_1[tti:].reshape(-1, 20).to(device)
Ytr, Yte = labels_1[:tti].reshape(-1, 1).to(device).to(torch.float), labels_1[tti:].reshape(-1, 1).to(device).to(torch.float)


In [20]:
# NN Modules

class RP(nn.Module):
    def __init__(self, in_out_size: int, layer_size: int, num_layers: int ):
        super(RP, self).__init__()
        self.in_out_size = in_out_size
        self.layer_size = layer_size
        self.num_layers = num_layers
        
        self.ReLU = nn.ReLU()
        
        # Hidden layers
        hidden_layer_list = nn.ModuleList()
        hidden_layer_list.append(nn.Linear(in_out_size, layer_size))
        hidden_layer_list.append(nn.ReLU())
        for _ in range(num_layers-1):
            hidden_layer_list.append(nn.Linear(layer_size, layer_size))
            hidden_layer_list.append(nn.ReLU())
        
        self.hidden = nn.Sequential(*hidden_layer_list)
            
        # Output layer
        self.out = nn.Sequential(
            nn.Linear(layer_size, in_out_size),
            nn.Sigmoid(),
        )
    
    def forward(self, x1, x2):
        """
        Args:
            x1: tensor [w_i^(l)]_{i \in T1}
            x2: tensor [w_i^(l)]_{i \in T2}

        Returns:
            tuple (r1, r2) where:
                r1:tensor [R_{AB, i}^(l)]_{i \in T1} (replace R with P respectively)
                r2: tensor [R_{AB, i}^(l)]_{i \in T2} (replace R with P respectively)
        """
        x = torch.cat((x1, x2), dim=1) # Concatenate into input 
        x = self.ReLU(x) # Apply first ReLU
        x = self.hidden(x) # Hidden layers
        x = self.out(x) # Out
        
        return (x[:, 0:(self.in_out_size//2)], x[:, (self.in_out_size//2):(self.in_out_size)])
        
        
class G(nn.Module):
    def __init__(self, in_size: int, layer_size: int, num_layers: int ):
        super(G, self).__init__()
        self.in_size = in_size
        self.layer_size = layer_size
        self.num_layers = num_layers
        
        self.ReLU = nn.ReLU()
        
        # Hidden layers
        hidden_layer_list = nn.ModuleList()
        hidden_layer_list.append(nn.Linear(in_size, layer_size))
        hidden_layer_list.append(nn.ReLU())
        for _ in range(num_layers-1):
            hidden_layer_list.append(nn.Linear(layer_size, layer_size))
            hidden_layer_list.append(nn.ReLU())
        
        self.hidden = nn.Sequential(*hidden_layer_list)
            
        # Output layer
        self.out = nn.Sequential(
            nn.Linear(layer_size, 1),
            nn.Sigmoid(),
        )
    
    def forward(self, x1, x2):
        """
        Args:
            x1: tensor [w_i^(l)]_{i \in T1}
            x2: tensor [w_i^(l)]_{i \in T2}

        Returns:
            tensor \hat{y}_{T1,T2}
        """
        x = torch.cat((x1, x2), dim=1) # Concatenate into input 
        x = self.ReLU(x) # Apply first ReLU
        x = self.hidden(x) # Hidden layers
        x = self.out(x) 
        
        return x

In [21]:
# Initialising modules and hyperparameters

# Hyperparameters

input_space_size =  num_champs + num_players # Number of different possible inputs
input_space_length = 20 # Number of inputs for T1+T2
team_size = input_space_length//2

L = 20 # Number of times to iterate R and P module
rp_layer_size = 7*input_space_length # Hidden layer size for R and P modules
rp_num_layers = 4 # Number of hidden layers for R and P modules
g_layer_size = 9*input_space_length # Hidden layer size for G module
g_num_layers = 2 # Number of hidden layers for G module
c_const = 1 # Constant seen in part (b) of the algorithm

batch_size = Xtr.shape[0] # Batch size (no batches)
reg_const = 1e-2 # Regularisation constant

# Modules

reward = RP(input_space_length, rp_layer_size, rp_num_layers).to(device)
penality = RP(input_space_length, rp_layer_size, rp_num_layers).to(device)
g_mod = G(input_space_length, g_layer_size, g_num_layers).to(device)
W = torch.randn((input_space_size)).to(device)
W.requires_grad = False

In [22]:
# Training

epochs = 400
learning_rate = 0.01

# Count parameters
print(sum(p.numel() for p in reward.parameters() if p.requires_grad) +
      sum(p.numel() for p in penality.parameters() if p.requires_grad) +
      sum(p.numel() for p in g_mod.parameters() if p.requires_grad), 'parameters in model.')

tr_lossg = []
tr_b_lossg = []
te_lossg = []

# Initialise
loss_f = torch.nn.BCELoss()
optimiser_R = torch.optim.Adam(reward.parameters(), lr=learning_rate, weight_decay=reg_const)
optimiser_P = torch.optim.Adam(penality.parameters(), lr=learning_rate, weight_decay=reg_const)
optimiser_G = torch.optim.Adam(g_mod.parameters(), lr=learning_rate, weight_decay=reg_const)

# Calculating alpha_const
occurrence_count = torch.scatter(torch.zeros(Xtr.shape[0], input_space_size).to(device), 1, Xtr, 1)
alpha_const =  c_const / ((occurrence_count.sum(dim=0)).max())
    
# No batches
X_b, Y_b = Xtr, Ytr
X1_b = X_b[:, 0:team_size]
X2_b = X_b[:, team_size:2*team_size]

# RENAME
@torch.no_grad()
def te_loss(X_1, X_2, Y):
    pred = g_mod.forward(W[X_1], W[X_2])
    return loss_f(pred, Y).item()
    
# Train loop
for i in range(epochs+1):
        
    W = W.detach()

    # Forward pass
    for k in range(L):

        R, P = reward.forward(W[X1_b], W[X2_b]), penality.forward(W[X1_b], W[X2_b])
        temp1 = (Y_b * R[0]) - ((1 - Y_b) * P[0])
        temp2 = ((1 - Y_b) * R[1]) - (Y_b * P[1])
        S = torch.concat((temp1, temp2), dim=1) # (i,j) = y_i R_ij - (1-y_i) P_ij

        adjustments = torch.scatter(torch.zeros(batch_size, input_space_size).to(device), 1, X_b, S)
        W += alpha_const * torch.sum(adjustments, dim=0)

        # Normalise 
        W = (W - W.mean())
        W = W / W.std()
    
    pred = g_mod.forward(W[X1_b], W[X2_b])

    # Backward pass
    
    optimiser_R.zero_grad()
    optimiser_P.zero_grad()
    optimiser_G.zero_grad()

    loss = loss_f(pred, Y_b)    
    loss.backward()
    
    optimiser_R.step()
    optimiser_P.step()
    optimiser_G.step()

    # Tracking data
    tr_b_lossg.append(loss.item())
    te_lossg.append(te_loss(Xte[:, 0:team_size], Xte[:, team_size:2*team_size], Yte)) 
    tr_lossg.append(te_loss(Xtr[:, 0:team_size], Xtr[:, team_size:2*team_size], Ytr)) 
                
    # Reporting status per epoch
    print('\r', end='')
    print(f'{i}/{epochs} complete ({round(((i)/epochs)*100)}%).',
          f'Train loss: {round(tr_lossg[-1], 4)}.', 
          f'Test loss: {round(te_lossg[-1], 4)}.', end='\r')

166501 parameters in model.
400/400 complete (100%). Train loss: 0.6864. Test loss: 0.6898.

In [23]:
# Utility and Analytics

@torch.no_grad()
def combined_champ_info(X,Y):
    """Calculates wins-losses for champions in (X,Y) and returns a dictionary of the form {'champ_name' : (wins-losses, weight)}"""

    temp = torch.concat(((2*Y - 1).expand(-1, 5), (1 - 2*Y).expand(-1, 5)), dim=1)
    wins = torch.scatter(torch.zeros(X.shape[0], num_champs).to(device), 1, X, temp)
    total_wins = wins.sum(dim=0)
    
    champ_info = {}
    for i in range(num_champs):
        champ_info[key_to_champ[i]] = (total_wins[i].item(), round(W[i].item(), 4))
        
    return dict(sorted(champ_info.items(), key=lambda item: item[1]))


@torch.no_grad()
def combined_player_info(X,Y):
    """Calculates wins-losses for players in (X,Y) and returns a dictionary of the form {'player_name' : (wins-losses, weight)}"""
    
    # Decoding '<U>' player identifiers 
    X[X > num_players -1 ] = player_to_key['<U>']

    temp = torch.concat(((2*Y - 1).expand(-1, 5), (1 - 2*Y).expand(-1, 5)), dim=1)
    wins = torch.scatter(torch.zeros(X.shape[0], num_players).to(device), 1, X[:num_players], temp)
    total_wins = wins.sum(dim=0)
    
    player_info = {}
    for i in range(num_players):
        player_info[key_to_player[i]] = (total_wins[i].item(), round(W[i].item(), 4))
        
    return dict(sorted(player_info.items(), key=lambda item: item[1]))


@torch.no_grad()
def accuracy(X, Y):
    X1, X2 = X[:, 0:team_size], X[:, team_size:2*team_size]
    pred = g_mod.forward(W[X1], W[X2])
    return (pred.round() == Y).sum().item() / Y.shape[0] 


# Print accuracy
print('Test accuracy:', accuracy(Xte, Yte))
print('Train accuracy:', accuracy(Xtr, Ytr))

# Print weights 
weights = {key_to_champ[i]: round(W[i].item(), 3) for i in range(num_champs)}
champ_info = combined_champ_info(Xtr[::2, 1::2,], Ytr[::2])
player_info = combined_player_info(Xtr[:, 0::2], Ytr)
print(champ_info)

Test accuracy: 0.5354868061874432
Train accuracy: 0.5516417684082808
{'Syndra': (-157.0, -7.2596), 'Fiora': (-147.0, -6.7579), 'Nami': (-144.0, -6.659), 'Tristana': (-93.0, -4.386), 'Janna': (-82.0, -3.7788), 'Olaf': (-81.0, -3.9548), "Bel'Veth": (-81.0, -3.8432), 'Kindred': (-78.0, -3.6592), 'Lucian': (-70.0, -3.2041), 'Karthus': (-59.0, -3.118), 'Blitzcrank': (-51.0, -2.5677), 'Kassadin': (-49.0, -2.2694), 'Ashe': (-49.0, -2.2322), 'Heimerdinger': (-47.0, -2.1519), 'Renata Glasc': (-44.0, -1.9967), 'Jarvan IV': (-43.0, -1.9484), 'Ekko': (-42.0, -2.0343), 'Vex': (-41.0, -2.0413), 'Zed': (-38.0, -1.9134), 'Twitch': (-37.0, -1.9236), 'Riven': (-36.0, -2.0807), 'Pantheon': (-34.0, -1.6939), 'Maokai': (-34.0, -1.4528), 'Amumu': (-33.0, -1.7536), 'Shen': (-33.0, -1.6345), 'Sona': (-33.0, -1.3177), "Rek'Sai": (-32.0, -1.2771), 'Xayah': (-31.0, -1.429), 'Fiddlesticks': (-30.0, -1.4623), 'Jhin': (-30.0, -1.3624), 'Nunu & Willump': (-29.0, -1.6919), 'Zoe': (-28.0, -1.4192), 'Xerath': (-28.0, -

In [24]:
print(player_info)

{'xayah rakan': (-6.0, 2.1748), 'FGG': (-5.0, -0.4178), 'Thebigestnoob1': (-4.0, -0.3529), 'Kakan': (-4.0, -0.3245), 'Krau': (-4.0, -0.0682), '200 SAKUYA': (-4.0, 0.3221), 'Κuzu': (-4.0, 3.3801), 'TwTv Scripter1v9': (-3.0, -2.0807), 'NHWK': (-3.0, -0.4869), 'Foxfpx1': (-3.0, -0.3961), 'rayndex33': (-3.0, -0.3743), '1 H4V3 C4NC3R': (-3.0, -0.3251), 'Boring cycle0': (-3.0, -0.3061), 'Triple H': (-3.0, -0.2353), '4tukano4': (-3.0, -0.1906), 'tempwygciybpjfhp': (-3.0, -0.1658), 'Iκιωι': (-3.0, -0.1641), 'DBE Devn': (-3.0, -0.1442), 'keia': (-3.0, -0.1428), 'Mikkel': (-3.0, -0.1216), 'MAD Card0netti': (-3.0, -0.1182), 'Hanterα': (-3.0, -0.0963), 'TTV suki545': (-3.0, -0.0733), 'Sebeks': (-3.0, -0.0725), 'twitch SAMMU': (-3.0, -0.0546), 'Taxer': (-3.0, -0.0508), 'TwTv iljimaeLoL': (-3.0, -0.0033), 'Orion070422': (-3.0, 0.0641), 'Bäby Rekkles': (-3.0, 0.0873), 'Wrongo': (-3.0, 0.0886), 'TwTv Nomilol': (-3.0, 0.1101), 'KARELIASABUSER69': (-3.0, 0.1104), 'INTOXICATED48': (-3.0, 0.1119), 'TIKY F