# TrueSkill models for multiple players

In [1]:
# Torch and Pyro
import torch
from torch import nn
import torch._numpy as np
import pyro
import pyro.distributions as dist
from pyro.infer.autoguide import AutoNormal
from pyro.infer import Predictive
from pyro.nn import PyroModule

# Plotting
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

# Project specific utilities
from utils import *

## Data Exploration

### Load Data

In [2]:
data = load_data('data/df_matches_clean.csv')

all_data = data['all_data']
id1 = data['coach1_id']
id2 = data['coach2_id']
num_coaches = data['num_coaches']
race1 = data['race1']
race2 = data['race2']
races = data['races']
obs = data['obs']
N = len(id1)
split = int(N)

# # Generate random numbers up to split without replacement
# np.random.seed(42)
# perm = np.random.permutation(split)
# idx_train = perm[:int(0.7*split)]
# idx_val = perm[int(0.3*split):]
# idx_test = np.arange(split, split+int(0.01*N))
idx_train = np.arange(split)
idx_val = idx_train
idx_test = np.arange(split)

# Make training set
id1_train = id1[idx_train]
id2_train = id2[idx_train]
obs_train = obs[idx_train]
race1_train = race1[idx_train]
race2_train = race2[idx_train]
game_info_train = {'coach1_ids': id1_train, 'coach2_ids': id2_train, 'num_coaches': num_coaches, 'races1': race1_train, 'races2': race2_train}

# Make validation set
id1_val = id1[idx_val]
id2_val = id2[idx_val]
obs_val = obs[idx_val]
race1_val = race1[idx_val]
race2_val = race2[idx_val]
game_info_val = {'coach1_ids': id1_val, 'coach2_ids': id2_val, 'num_coaches': num_coaches, 'races1': race1_val, 'races2': race2_val}

# Make test set
id1_test = id1[idx_test]
id2_test = id2[idx_test]
obs_test = obs[idx_test]
race1_test = race1[idx_test]
race2_test = race2[idx_test]
game_info_test = {'coach1_ids': id1_test, 'coach2_ids': id2_test, 'num_coaches': num_coaches, 'races1': race1_test, 'races2': race2_test}

### Compute winrate for each pairing of races

In [3]:
winrates = np.zeros((len(races), len(races)))

for i, race1 in enumerate(races):
    for j, race2 in enumerate(races):
        win_as_p1 = sum((all_data['team1_win'] == 1) & (all_data['team1_race_name'] == race1) & (all_data['team2_race_name'] == race2))
        win_as_p2 = sum((all_data['team2_win'] == 1) & (all_data['team2_race_name'] == race1) & (all_data['team1_race_name'] == race2))
        draws = sum((all_data['team1_win'] == 0) & (all_data['team1_race_name'] == race1) & (all_data['team2_race_name'] == race2))
        total = sum((all_data['team1_race_name'] == race1) & (all_data['team2_race_name'] == race2)) + sum((all_data['team2_race_name'] == race1) & (all_data['team1_race_name'] == race2))
        winrates[j, i] = (win_as_p1 + win_as_p2 + draws) / total

In [4]:
# Plot the heatmap
fig = go.Figure(data=go.Heatmap(
    z=winrates,
    x=races,
    y=races,
    colorscale='RdBu_r',
))

# Set the labels
fig.update_xaxes(title_text='Player', tickangle=90)
fig.update_yaxes(title_text='Opponent', tickangle=0)

# Set the title
fig.update_layout(title_text='Racial Winrate', autosize=False, width=800, height=800)
# Show the plot
fig.show()

# Calculate the mean and reshape it into a 1D array
mean_values = np.reshape(winrates.mean(axis=0), (-1,)) - 0.5
color_scale = np.interp(mean_values, (mean_values.min(), mean_values.max()), [0, 1])

# Create the bar plot
fig = go.Figure(data=go.Bar(x=races, y=mean_values, base=0.5, marker=dict(color=color_scale, colorscale='RdBu_r',)))
fig.update_layout(title_text='Average Racial Winrate', width=800)
fig.update_xaxes(tickangle=45, )
fig.show()

## Models

In [5]:
default_mu_skill = 0
default_sigma_skill = 1

hyper_sigma = 1/8

### Standard multiple player TrueSkill

In [6]:
def MultiPlayer(game_info, obs=None):
    """
    Inputs:
        game_info: A dictionary containing the following keys:
            coach1_ids: A 1D tensor of length "num_games". The i-th element represents the id of the coach for coach 1 in the i-th game.
            coach2_ids: A 1D tensor of length "num_games". The i-th element represents the id of the coach for coach 2 in the i-th game.
            num_coaches: An integer representing the number of coaches in the data.
        obs: obs: A 1D tensor of observed data. The length should be equal to the number of games played.
             Represents the outcome of a game in the eyes of coach 1, i.e. coach 1 wins: obs = 1, coach 2 wins: obs = -1.

    Output: A 1D tensor of the same length as "obs". Represents a sample from difference in performance between the two coaches.
    """

    # Extract the data
    ids1 = game_info['coach1_ids']
    ids2 = game_info['coach2_ids']
    num_coaches = game_info['num_coaches']
    N = len(ids1)
    
    try:
        coach_mu_skill = game_info['coach_mu_skill']
        coach_sigma_skill = game_info['coach_sigma_skill']
    except:
        coach_mu_skill = default_mu_skill
        coach_sigma_skill = default_sigma_skill

    # Sample skills for each coach using a plate for coaches
    with pyro.plate('coaches', num_coaches):
        coach_skills = pyro.sample("coach_skills", dist.Normal(coach_mu_skill, coach_sigma_skill))
    
    # Sample the performance difference for each match
    with pyro.plate('matches', N):
        # Gather the skills for the competing coaches in each match
        coach1_skills = coach_skills[ids1]
        coach2_skills = coach_skills[ids2]

        # Sample performances for the coaches in each match
        coach1_perf = pyro.sample('coach1_perf', dist.Normal(coach1_skills, hyper_sigma))
        coach2_perf = pyro.sample('coach2_perf', dist.Normal(coach2_skills, hyper_sigma))

        # Compute the performance difference
        perf_diff = coach1_perf - coach2_perf

        # Sample the observed outcomes
        y = pyro.sample("y_coach1_win", dist.Normal(perf_diff, hyper_sigma), obs=obs)

    return y

### Multiple player TrueSkill with Neural Network Estimated Racial Performance Bias

In [7]:
class RaceNet(PyroModule):
    def __init__(self, input_dim, hidden_dim=16, output_dim=1):
        super(RaceNet, self).__init__()

        self.net = nn.Sequential(
            PyroModule[nn.Linear](input_dim, hidden_dim, bias=False),
            nn.Tanh(),
            PyroModule[nn.Linear](hidden_dim, hidden_dim, bias=False),
            nn.Tanh(),
            PyroModule[nn.Linear](hidden_dim, output_dim, bias=False),
        )

    def forward(self, races1, races2): 
        x = races1-races2
        return self.net(x)

num_races = 28
NN = RaceNet(num_races)

def RaceNetMultiPlayer(game_info, obs=None):
    """
    Inputs:
        obs: obs: A 1D tensor of observed data. The length should be equal to the number of games played.
             Represents the outcome of a game in the eyes of coach 1, i.e. coach 1 wins: obs = 1, coach 2 wins: obs = -1.
        ids1: A 1D tensor of the same length as obs. Contains the index of the first coach in each game.
        ids2: A 1D tensor of the same length as obs. Contains the index of the second coach in each game.
        num_coaches: The number of coaches in the dataset.

    Output: A 1D tensor of the same length as "obs". Represents a sample from difference in performance between the two coaches.
    """

    # Extract the game information
    ids1 = game_info['coach1_ids']
    ids2 = game_info['coach2_ids']
    races1 = game_info['races1']
    races2 = game_info['races2']
    num_coaches = game_info['num_coaches']    
    
    try:
        coach_mu_skill = game_info['coach_mu_skill']
        coach_sigma_skill = game_info['coach_sigma_skill']
    except:
        coach_mu_skill = default_mu_skill
        coach_sigma_skill = default_sigma_skill

    num_games = len(ids1)
    num_races = len(races1[0])

    # Sample skills for each coach using a plate for coaches
    with pyro.plate('coaches', num_coaches):
        coach_skills = pyro.sample("coach_skills", dist.Normal(coach_mu_skill, coach_sigma_skill))
    
    with pyro.plate('matches', num_games):
        # Gather the skills for the competing coaches in each match
        coach1_skills = coach_skills[ids1]
        coach2_skills = coach_skills[ids2]

        # Get racial bias
        biases = NN(races1, races2)

        # Sample performances for the coaches in each match
        coach1_perf = pyro.sample('coach1_perf', dist.Normal(coach1_skills , hyper_sigma))
        coach2_perf = pyro.sample('coach2_perf', dist.Normal(coach2_skills, hyper_sigma))

        # Compute the performance difference
        perf_diff = coach1_perf + biases[:, 0] - (coach2_perf - biases[:, 0])

        # Sample the observed outcomes
        y = pyro.sample("y_coach1_win", dist.Normal(perf_diff, hyper_sigma), obs=obs)

    return y

### Sanity check of multiple player TrueSkill model on synthetic data

Initially, we want to confirm that multiple player TrueSkill model behaves like the 2 Player model. To test this we generate dummy data for six players in a round-robin tournament structure.
A round-robin tournament is where every player plays against every other player exactly once. To make it easy to check the results, the outcome of the games will be determined by the index of the players. The player with the higher index will always win. This means that the player with index 0 will lose all games, the player with index 1 will lose all games except against player 0, and so on.

In [8]:
# Generate round-robin data
num_coaches_rr = 6
pairs = torch.tensor([[i, j] for i in range(num_coaches_rr) for j in range(i+1, num_coaches_rr)])
id1 = pairs[:, 0]
id2 = pairs[:, 1]
obs = -torch.ones(len(pairs))

game_info = {'coach1_ids': id1, 'coach2_ids': id2, 'num_coaches': num_coaches_rr}

# Train the model
guide = AutoNormal(MultiPlayer)
losses = run_inference(MultiPlayer, guide, game_info, obs)

# Run predictive posterior
predictive = Predictive(MultiPlayer, guide=guide, num_samples=2000)
samples = predictive(game_info, obs)

# Plot the results
fig = go.Figure()
for i in range(num_coaches_rr):
    hist_data = samples["coach_skills"].detach().squeeze()[:,i]
    fig.add_trace(go.Histogram(x=hist_data, histnorm='probability density'))
    fig.add_annotation(x=hist_data.mean(), y=1, text=f"<b>Coach {i+1}</b>", showarrow=False, font=dict(size=12, color="black"),
        borderpad=4, bgcolor="white", opacity=0.8)
fig.update_layout(barmode='overlay', showlegend=False, xaxis_title="Skill", yaxis_title="Density", title="Coach skill", width=1200)
fig.show()

Loss = 41.558471: 100%|██████████| 2000/2000 [00:14<00:00, 136.19it/s]


## Infer models on real data

In [9]:
# Infer Model
guide_MultiPlayer = AutoNormal(MultiPlayer)
losses_MultiPlayer = run_inference(MultiPlayer, guide_MultiPlayer, game_info_train, obs_train, num_steps=1000)

# Predictive distribution
predictive_MultiPlayer = Predictive(MultiPlayer, guide=guide_MultiPlayer, num_samples=1000)
samples_MultiPlayer = predictive_MultiPlayer(game_info_train, obs_train)

# Predict
game_info_val['coach_mu_skill'] = samples_MultiPlayer["coach_skills"].mean(dim=0)
game_info_val['coach_sigma_skill'] = samples_MultiPlayer["coach_skills"].std(dim=0)
prediction_MultiPlayer = MultiPlayer(game_info_val).detach().numpy()

Loss = 569889.092285: 100%|██████████| 1000/1000 [00:25<00:00, 38.59it/s]


In [10]:
# Infer RaceNetModel
guide_RaceNetMultiPlayer = AutoNormal(RaceNetMultiPlayer)
losses_RaceNetMultiPlayer = run_inference(RaceNetMultiPlayer, guide_RaceNetMultiPlayer, game_info_train, obs_train, num_steps=1000)

# Predictive distribution
predictive_RaceNetMultiPlayer = Predictive(RaceNetMultiPlayer, guide=guide_RaceNetMultiPlayer, num_samples=1000)
samples_RaceNetMultiPlayer = predictive_RaceNetMultiPlayer(game_info_train, obs)

# Predict
game_info_val['coach_mu_skill'] = samples_RaceNetMultiPlayer["coach_skills"].mean(dim=0)
game_info_val['coach_sigma_skill'] = samples_RaceNetMultiPlayer["coach_skills"].std(dim=0)
prediction_RaceNetMultiPlayer = RaceNetMultiPlayer(game_info_val).detach().numpy()

Loss = 543347.225586: 100%|██████████| 1000/1000 [00:45<00:00, 22.17it/s]


### Investigate DrawMargin

In [11]:
map = lambda pred, margin: np.select([pred < -margin, abs(pred) <= margin, pred > margin], [-1, 0, 1])
gt = obs_val.detach().numpy()

margins = np.linspace(0, 2, 200)
acc = np.zeros((2, len(margins)))
for i, margin in enumerate(margins):
    results_MultiPlayer = map(prediction_MultiPlayer, margin)
    results_RaceNetMultiPlayer = map(prediction_RaceNetMultiPlayer, margin)
    acc[0, i] = sum(results_MultiPlayer == gt) / len(gt)
    acc[1, i] = sum(results_RaceNetMultiPlayer == gt) / len(gt)

baseline_acc = max([sum(gt == i) / len(gt) for i in [-1, 0, 1]])

fig = go.Figure()
fig.add_trace( go.Scatter(x=margins, y=acc[0], mode='lines', name='MultiPlayer') )
fig.add_trace( go.Scatter(x=margins, y=acc[1], mode='lines', name='RaceNetMultiPlayer') )
fig.add_trace( go.Scatter(x=margins, y=[baseline_acc] * len(margins), mode='lines', name='Baseline') )
fig.update_layout(title='Model Accuracy on training data', xaxis_title='Draw Margin', yaxis_title='Accuracy', width=800, height=600)
fig.update_yaxes(range=[0, 1])
fig.show()

### Check racial performance bias learned by the neural network

In [12]:
# Construct the heatmap
heatmap = torch.zeros(num_races, num_races)
for i in range(num_races):
    for j in range(num_races):
        r1 = torch.zeros(1, num_races)
        r1[0, i] = 1
        r2 = torch.zeros(1, num_races)
        r2[0, j] = 1
        heatmap[j, i] = NN(r1, r2)[0, 0]

heatmap = heatmap.detach().numpy()

# Plot the heatmap
fig = go.Figure(data=go.Heatmap(z=heatmap, x=races, y=races, colorscale='RdBu_r',))

# Set the labels
fig.update_xaxes(title_text='Player', tickangle=90)
fig.update_yaxes(title_text='Opponent', tickangle=0)

# Set the title
fig.update_layout(title_text='Racial performance bias', autosize=False, width=800, height=800)
# Show the plot
fig.show()

# Calculate the mean and reshape it into a 1D array
mean_values = np.reshape(heatmap.mean(axis=0), (-1,))
color_scale = np.interp(mean_values, (mean_values.min(), mean_values.max()), [-1, 1])

# Create the bar plot
fig = go.Figure(data=go.Bar(x=races, y=mean_values, marker=dict(color=color_scale, colorscale='RdBu_r',)))
fig.update_layout(title_text='Average Racial Performance Bias', width=800)
fig.update_xaxes(tickangle=45, )
fig.show()

In [13]:
fig = go.Figure()
for i in np.random.randint(0, num_coaches, 10):
    samples = samples_MultiPlayer["coach_skills"].detach().squeeze()[:,i]
    fig.add_trace(go.Histogram(x=samples, name=f"Coach {i+1} winrate: {data['sorted_winrates'][i]:.2f}, num_matches: {int(data['sorted_num_matches'][i])}",
                               histnorm='probability density'))
fig.update_layout(barmode='overlay', xaxis_title="Skill", yaxis_title="Density", title="Coach skill", width=1200)
fig.show()