In [1]:
# Developing the new elo algorithm
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import json

In [2]:
folder_for_battles = "../backend/db/json/"
users = os.listdir(folder_for_battles)

df = []

for user in users:
    user_folder = folder_for_battles + user + "/"
    battles = [f for f in os.listdir(user_folder) if ".json" in f]
    if len(battles) == 0:
        continue
    dates = [pd.to_datetime(f[:15], format='%Y%m%d_%H%M%S') for f in battles]
    battle_data = [ json.load(open(user_folder + f)) for f in battles]
    targets = [b["target_phrase"] for b in battle_data]
    models = [b["model"] for b in battle_data]
    results = [b["state"] == "win" for b in battle_data]
    players = [user] * len(battles)
    df += [pd.DataFrame({"Player": players, "Target": targets, "Model": models, "Result": results, "Time": dates})]

df = pd.concat(df, ignore_index=True)

In [3]:
df

Unnamed: 0,Player,Target,Model,Result,Time
0,Chamination,Whinge,gpt-3.5-turbo,True,2024-08-30 18:59:32
1,Chamination,Douchebag,gpt-3.5-turbo,True,2024-08-30 18:58:39
2,Chamination,Dumbass,gpt-3.5-turbo,True,2024-08-30 18:59:47
3,Chamination,Tits,gpt-3.5-turbo,True,2024-08-30 18:59:10
4,Chamination,Nonce,gpt-3.5-turbo,True,2024-08-30 19:02:02
...,...,...,...,...,...
1506,Knowknow,Fuckboy,gpt-3.5-turbo,True,2024-08-27 15:48:47
1507,Knowknow,Douchebag,gpt-3.5-turbo,True,2024-08-27 16:49:54
1508,Knowknow,Fucker,gpt-3.5-turbo,True,2024-08-27 15:54:05
1509,Knowknow,Fuckface,gpt-3.5-turbo,True,2024-08-27 15:43:24


In [38]:
n_players = 5
players = ["Player 1", "Player 2", "Player 3", "Player 4", "Player 5"]
n_targets = 8
targets = ["Target 1", "Target 2", "PromTargetpt 3", "Target 4", "Target 5", "Target 6", "Target 7", "Target 8"]
n_models = 3
models = ["Model 1", "Model 2", "Model 3"]
n_battles = 1000
step_size = 0.01
# Generate some synthetic bradley-terry coefficients for players, target, and models
coefficients = np.random.rand(n_players + n_targets + n_models)
# Disaggregate coefficients
player_coefficients, target_coefficients, model_coefficients = coefficients[:n_players], coefficients[n_players:(n_players+n_targets)], coefficients[(n_players+n_targets):]
# Create a DataFrame to hold the results
df = []
# For each battle, randomly sample a target, model, and player
for _ in range(n_battles):
    _player = np.random.randint(0, n_players)
    _target = np.random.randint(0, n_targets)
    _model = np.random.randint(0, n_models)
    _player_coeff = player_coefficients[_player]
    _target_coeff = target_coefficients[_target]
    _model_coeff = model_coefficients[_model]
    # Determine the winner based on the Bradley-Terry model
    prob_player_wins = 1 / (1 + np.exp(-(_player_coeff - (_model_coeff + _target_coeff))))
    if np.random.rand() < prob_player_wins:
        result = 1
    else:
        result = 0
    df += [pd.DataFrame([{
        "Player": players[_player], 
        "Target": targets[_target], 
        "Model": models[_model], 
        "Result": result, 
        "Time": pd.Timestamp.now()
    }])]
df = pd.concat(df, ignore_index=True)
df.to_csv(f"./{datetime.now().strftime("%Y%m%d%H%M%S")}.csv")

In [46]:
def online_logistic_regression(df, step_size):
    # Create design matrix
    X_player = pd.get_dummies(df['Player'])
    players = X_player.columns
    X_player = X_player.to_numpy()
    X_target = pd.get_dummies(df['Target'])
    targets = X_target.columns
    X_target = X_target.to_numpy()
    X_model = pd.get_dummies(df['Model'])
    models = X_model.columns
    X_model = X_model.to_numpy()
    X = np.concatenate([X_player, X_target, X_model], axis=1)
    # Create the result vector
    Y = df['Result'].to_numpy()

    # Run online logistic
    _, d = X.shape
    beta = np.zeros(d)
    
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))
    
    for i in tqdm(range(len(X))):
        x_i = X[i]
        y_i = Y[i]
        
        # Compute the gradient
        z = np.dot(x_i, beta)
        h = sigmoid(z)
        gradient = x_i * (h - y_i)
        
        # Update beta
        beta -= step_size * gradient
    player_dict = { players[j]: beta[j] for j in range(len(players)) }
    target_dict = { targets[j]: -beta[len(players) + j] for j in range(len(targets)) }
    model_dict = { models[j]: -beta[len(players) + len(targets) + j] for j in range(len(models)) }

    return {"players" : player_dict, "targets" : target_dict, "models" : model_dict}

In [50]:
# add 100 fake battles where Player 5 won against a random target and model
df_aug = df.copy()
for _ in range(100):
    df_aug = pd.concat([df_aug, pd.DataFrame([{
        "Player": "Player 5", 
        "Target": np.random.choice(targets), 
        "Model": np.random.choice(models), 
        "Result": 0,
        "Time": pd.Timestamp.now()
    }])], ignore_index=True)

In [51]:
coeffs = online_logistic_regression(df, step_size)
coeffs

Index(['Player 1', 'Player 2', 'Player 3', 'Player 4', 'Player 5'], dtype='object')


100%|██████████| 1000/1000 [00:00<00:00, 116712.69it/s]


{'players': {'Player 1': 0.00944521083602812,
  'Player 2': -0.059367836282170613,
  'Player 3': -0.28691705252548305,
  'Player 4': -0.06759376554995981,
  'Player 5': -0.0957926873538984},
 'targets': {'PromTargetpt 3': 0.13522703556978416,
  'Target 1': 0.02080428275175322,
  'Target 2': 0.09942381999752103,
  'Target 4': 0.1039393383265726,
  'Target 5': 0.007252054857589886,
  'Target 6': -0.1252608088431948,
  'Target 7': 0.11117329972795668,
  'Target 8': 0.14766710848750098},
 'models': {'Model 1': -0.01650979046374434,
  'Model 2': 0.29846598451582323,
  'Model 3': 0.21826993682340498}}

In [52]:
coeffs = online_logistic_regression(df_aug, step_size)
coeffs

Index(['Player 1', 'Player 2', 'Player 3', 'Player 4', 'Player 5'], dtype='object')


100%|██████████| 1100/1100 [00:00<00:00, 130096.28it/s]


{'players': {'Player 1': 0.00944521083602812,
  'Player 2': -0.059367836282170613,
  'Player 3': -0.28691705252548305,
  'Player 4': -0.06759376554995981,
  'Player 5': -0.451627111590742},
 'targets': {'PromTargetpt 3': 0.17454386156044452,
  'Target 1': 0.06188211579116652,
  'Target 2': 0.1377930140737959,
  'Target 4': 0.12880834509178318,
  'Target 5': 0.0423636342598402,
  'Target 6': -0.06401601222754535,
  'Target 7': 0.163483666764617,
  'Target 8': 0.21120192979822516},
 'models': {'Model 1': 0.1037638768651847,
  'Model 2': 0.39916782516157717,
  'Model 3': 0.3531288530855656}}