# Trueskill model with Neural Network adjusted performance

## Notes

This model assumes the following data structure:

**params**: Dict with 2 keys:
- "mu": NumPy array of mean skill values (num_players)
- "sigma": NumPy array of standard deviation skill values (num_players)

**game_info**: Dict with 4 keys:
- "player1": NumPy array with the player1 ids (num_games)
- "race1": NumPy array with one-hot-encoded race1 ids (num_games x num_races)
- "player2": NumPy array with the player2 ids (num_games)
- "race2": NumPy array with one-hot-encoded race2 ids (num_games x num_races)

**result**: NumPy array with the outcome of the games (num_games)
- -1: player1 wins
- 0: draw
- 1: player2 wins

In [1]:
import torch
from torch import nn
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO, Predictive
from pyro.optim import Adam
from pyro.infer.autoguide import AutoDiagonalNormal

In [2]:
class RaceNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=3, output_dim=2):
        super(RaceNet, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):

        race1 = x[:, 0]
        race2 = x[:, 1]
        x = torch.cat((race1, race2), dim=1)

        return self.net(x)

In [3]:
def model(params, game_info, result=None, device='cpu'):

    # Hyperparameters
    performance_sigma = torch.tensor(5.0).to(device)
    result_sigma = torch.tensor(1.0).to(device)
    mu = torch.tensor(params['mu']).to(device)
    sigma = torch.tensor(params['sigma']).to(device)

    # Data
    player1 = torch.tensor(game_info['player1'], dtype=int).to(device)
    race1 = torch.tensor(game_info['race1'], dtype=torch.float32).to(device)
    player2 = torch.tensor(game_info['player2'], dtype=int).to(device)
    race2 = torch.tensor(game_info['race2'], dtype=torch.float32).to(device)

    # Dimensions
    N = len(mu) # Number of players
    M = len(player1) # Number of games
    R = len(race1[0]) # Number of races

    # Neural network for racial performance bias
    NN = RaceNet(2*R).to(device)

    # Draw skill for each player
    with pyro.plate('players', N):
        skill = pyro.sample('skill', dist.Normal(mu, sigma))

    # Loop over games
    with pyro.plate('games', M):
        # Get skill for each player
        skill1 = skill[player1]
        skill2 = skill[player2]
        
        # Get racial bias
        bias = NN(torch.stack((race1, race2), dim=1))
        bias1 = bias[:, 0]
        bias2 = bias[:, 1]

        # Draw performance
        performance1 = pyro.sample('performance1', dist.Normal(skill1-bias1, performance_sigma))
        performance2 = pyro.sample('performance2', dist.Normal(skill2-bias2, performance_sigma))

        # Draw result
        if result is not None:
            result = torch.tensor(result, dtype=torch.float32).to(device)
        outcome = pyro.sample('result', dist.Normal(performance2 - performance1, result_sigma), obs=result)
        outcome = torch.sign(torch.round(outcome))

    return outcome

In [4]:
def train_loop(model, params, games, result, iters=1000, device='cpu'):
    print(f"Using device: {device}")
    svi = SVI(model, AutoDiagonalNormal(model), Adam({'lr': 0.03}), loss=Trace_ELBO())
    for step in range(iters):
        loss = svi.step(params, games, result=result, device=device)
        if step % 100 == 0:
            print('step', step, 'loss', loss)

In [5]:
# Run the model
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
params = {'mu': [100., 100.], 'sigma': [20., 20.]}
games = {'player1': [1, 0], 'race1': [[1, 0, 0],[0, 1, 0]], 'player2': [1, 0], 'race2': [[0, 0, 1],[1, 0, 0]]}
result = [0, 1]
train_loop(model, params, games, result, iters=1000, device=device)

Using device: cpu
step 0 loss 26.19870662689209
step 100 loss 15.711596250534058
step 200 loss 10.984839677810669
step 300 loss 10.48905897140503
step 400 loss 11.659853458404541
step 500 loss 12.114955425262451
step 600 loss 11.892566919326782
step 700 loss 12.781346321105957
step 800 loss 11.42769742012024
step 900 loss 11.348999500274658
