In [1]:
import leaderbot
import leaderbot as lb
import numpy as np
import matplotlib.pyplot as plt
import texplot
import time
import scipy

In [2]:
data = lb.load_data()

agents = np.array(data['models'])
x = np.array(data['X'])
y = np.array(data['Y'])
n_agents = len(agents)

# Clean empty data
y_sum = np.sum(y, axis=1)
ind_valid = y_sum != 0
x = x[ind_valid]
y = y[ind_valid]

In [3]:
# wins, losses, ties = y[:, 0], y[:, 1], y[:, 2]

# n_wins = np.zeros((n_agents,), dtype=int)
# n_losses = np.zeros((n_agents,), dtype=int)
# n_ties = np.zeros((n_agents,), dtype=int)
# n_games = np.zeros((n_agents,), dtype=int)

# for i in range(n_agents):
#     ind_0 = (x[:, 0] == i)
#     ind_1 = (x[:, 1] == i)
#     n_wins[i] = np.sum(wins[ind_0]) + np.sum(losses[ind_1])
#     n_losses[i] = np.sum(losses[ind_0]) + np.sum(wins[ind_1])
#     n_ties[i] = np.sum(ties[ind_0]) + np.sum(ties[ind_1])

# n_matches = n_wins + n_losses + n_ties
# p_wins = n_wins / n_matches
# p_losses = n_losses / n_matches
# p_ties = n_ties / n_matches

In [8]:
def kl_divergence(p, q):
    h = p * np.log(p / q)
    h[p == 0] = 0
    return h

In [9]:
#algs = [davidson, davidson_scaled, davidson_scaled_r, davidson_scaled_rij]
model = lb.DavidsonScaled(data)
model.train()

In [10]:
p_pred = model.infer()

In [11]:
# Jensen-Shannon Divergence
y_sum = y.sum(axis=1, keepdims=True)
y_sum[y_sum == 0] = 1.0
p_obs = y / y_sum
p_mean = (p_pred + p_obs) / 2.0
jsd_ = 0.5 * (kl_divergence(p_obs, p_mean) + kl_divergence(p_pred, p_mean))
jsd_ = jsd_.sum(axis=1)
jsd_mean = np.mean(jsd_)
jsd_std = np.std(jsd_)

print(f'jsd: {jsd_mean:>0.4f} +-({jsd_std:>0.4f})')

jsd: 0.0092 +-(0.0307)


In [16]:
np.any(np.isnan(p_mean))

False

In [22]:
np.any(p_obs == 0)

True

In [9]:
# KL Divergence
y_sum = y.sum(axis=1, keepdims=True)
y_sum[y_sum == 0] = 1.0
p_obs = y / y_sum
kld_ = kl_divergence(p_obs, p_pred)
kld_ = kld_.sum(axis=1)
kld_mean = np.mean(kld_)
kld_std = np.std(kld_)

print(f'jsd: {kld_mean:>0.4f} +-({kld_std:>0.4f})')

jsd: 0.0332 +-(0.1068)


In [11]:
# AIC, BIC
n_data = y.shape[0]
loss_ = model.loss(return_jac=False, constraint=False)
aic_ = 2.0 * model.n_param - 2.0 * loss_
bic_ = model.n_param * np.log(n_data) - 2.0 * loss_

print(aic_)
print(bic_)

515.9804811226344
2108.203114798893


In [5]:
# Create a list of models
model1 = leaderbot.BradleyTerryScaled(data)
model2 = leaderbot.RaoKupperScaled(data)
model3 = leaderbot.DavidsonScaled(data)
models = [model1, model2, model3]

In [6]:
# Evaluate models
metrics = leaderbot.evaluate(models, train=True, print=True)

In [10]:
from pprint import pprint
pprint(metrics)

n_param = metrics['n_param']
name = metrics['name']
loss = metrics['loss']
jsd = metrics['jsd']
kld = metrics['kld']
aic = metrics['aic']
bic = metrics['bic']

{'aic': [514.6895864968636, 515.981545356483, 515.9804811226344],
 'bic': [2100.7646424369204, 2108.204179032741, 2108.203114798893],
 'jsd': [0.07217424036142622, 0.008951195124648122, 0.00922312213548934],
 'kld': [inf, 0.03230331757774358, 0.033222671912828225],
 'loss': [0.6552067515681887, 1.0092273217584713, 1.0097594386828288],
 'n_param': [258, 259, 259],
 'name': ['bradleyterry_scaled', 'raokupper_scaled', 'davidson_scaled']}


In [33]:
print('+---------------------+---------+--------+------+------+------+---------+')
print('| name                | # param | loss   | kld | jsd  | aic  | bic     |')
for i in range(len(name)):
    name_ = name[i]
    print(f'| {name[i]:<15s} '
          f'| {n_param[i]:>7} '
          f'| {loss[i]:>0.5} '
          f'| {kld[i]:>0.4}  '
          f'| {jsd[i]:>0.4} '
          f'| {aic[i]:>0.4} '
          f'| {bic[i]:>0.4} |')

+---------------------+---------+--------+------+------+------+---------+
| name                | # param | loss   | kld | jsd  | aic  | bic     |
| bradleyterry_scaled |     258 | 0.65521 | inf  | 0.07217 | 514.7 | 2.101e+03 |
| raokupper_scaled |     259 | 1.0092 | 0.0323  | 0.008951 | 516.0 | 2.108e+03 |
| davidson_scaled |     259 | 1.0098 | 0.03322  | 0.009223 | 516.0 | 2.108e+03 |
