In [1]:
import leaderbot as lb
import numpy
import matplotlib.pyplot as plt
from scipy.optimize import minimize, differential_evolution, shgo, basinhopping, dual_annealing
import texplot
import time
import scipy

In [2]:
data = load_chatbot_arena_data()

models = numpy.array(data['models'])
xs = numpy.array(data['X'])
ys = numpy.array(data['Y'])
n_models = len(models)

# Clean empty data
y_sum = numpy.sum(ys, axis=-1)
ind_valid = y_sum != 0
xs = xs[ind_valid]
ys = ys[ind_valid]

In [3]:
wins, losses, ties = ys[:, 0], ys[:, 1], ys[:, 2]

n_wins = numpy.zeros((n_models,), dtype=int)
n_losses = numpy.zeros((n_models,), dtype=int)
n_ties = numpy.zeros((n_models,), dtype=int)
n_games = numpy.zeros((n_models,), dtype=int)

for i in range(n_models):
    ind_0 = (xs[:, 0] == i)
    ind_1 = (xs[:, 1] == i)
    n_wins[i] = numpy.sum(wins[ind_0]) + numpy.sum(losses[ind_1])
    n_losses[i] = numpy.sum(losses[ind_0]) + numpy.sum(wins[ind_1])
    n_ties[i] = numpy.sum(ties[ind_0]) + numpy.sum(ties[ind_1])

n_games = n_wins + n_losses + n_ties
p_wins = n_wins / n_games
p_losses = n_losses / n_games
p_ties = n_ties / n_games

In [4]:
#algs = [davidson, davidson_scaled, davidson_scaled_r, davidson_scaled_rij]
algs = [davidson]

In [5]:
#w0 = numpy.zeros(n_models+1)
n_pairs = n_models * (n_models - 1) // 2
#w0 = numpy.zeros(2*n_models + n_pairs + 1)
#w0 = numpy.zeros(2*n_models + 2)
w0 = numpy.zeros(2*n_models + 1)
#w0[:n_models] = numpy.full(n_models, numpy.log(1/n_models))

init_guess = p_wins + p_ties/2.0
init_guess[numpy.isnan(init_guess)] = 0
init_guess = init_guess - numpy.mean(init_guess)
w0[:n_models] = init_guess

w0[n_models:n_models * 2] = np.full(n_models, (1.0/n_models) ** 0.5)

In [None]:

benchmark = 0 # choose which model to be the default for the rest of analysis
results = []

for alg in algs:
    t0 = time.time()
    res = minimize(alg.jac, w0, args=(xs, ys, n_models),
                   jac=True, method=method, options={'maxiter': 1500},
                   tol=1e-8)
    t = time.time() - t0
    results.append(res)

    print(f'{method:>9} | nit: {res.nit:>4}, nfev: {res.nfev:>4}, ', end='')
    if hasattr(res, 'njev'):
        print(f'njev: {res.njev:>5} | ', end='')
    else:
        print('njev:     0 | ', end='')
    print(f'time: {t:>6.2f} | '
          f'fun: {res.fun:>0.6f}, jac: {numpy.mean(numpy.abs(res.jac)):>0.1e}')