In [16]:
import matplotlib.pyplot as plt

import pyGMs as gm
import numpy as np
import torch
import random

import requests                                      # reading data
from io import StringIO
import time

import pyro
import pyro.infer
import pyro.optim
import pyro.distributions as dist
import torch.distributions.constraints as constraints
import pyro.poutine as poutine

import pandas as pd
import torch
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

from cmdstanpy import CmdStanModel
import cmdstanpy
import stan
import nest_asyncio
from sklearn.preprocessing import LabelEncoder
import pickle


from IPython.display import display, clear_output  # for iterative plotting

nest_asyncio.apply()

seed = 123
random.seed(seed)
pyro.set_rng_seed(seed)

Most models use the Bradleyâ€“Terry or some version of it. For more information about the model: https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model

# Model 1

Baseline model with only latent ratings and win/lose result

In [30]:
path = "../data/processed/games_clean.csv"
data = pd.read_csv(path)
    
player_encoder = LabelEncoder()
all_players = pd.concat([data['white_id'], data['black_id']]).unique()
player_encoder.fit(all_players)

data['white_id_encoded'] = player_encoder.transform(data['white_id'])
data['black_id_encoded'] = player_encoder.transform(data['black_id'])
data = data[data['result'] != 0.5]
data['result'] = data['result'].astype(int)

stan_data = {
    'N': len(data),
    'K': len(player_encoder.classes_),
    'white': data['white_id_encoded'].values + 1,
    'black': data['black_id_encoded'].values + 1,
    'result': data['result'].values,
    'prior_mu': 0.0,  # Prior mean for player skills
    'prior_sigma': 3.0  # Prior std dev for player skills
}

In [31]:
model_dir = "../models/stan_models/model1.stan"
posterior = CmdStanModel(stan_file = model_dir)
fit = posterior.sample(data = stan_data)

14:28:18 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

14:31:16 - cmdstanpy - INFO - CmdStan done processing.





In [34]:
# Save model
save_dir = "../models/trained_models/stan/model1"
fit.save_csvfiles(dir=save_dir)

In [18]:
# Load model
load_dir = "../models/trained_models/stan/model1"
fit = cmdstanpy.from_csv(f"{save_dir}")

In [33]:
skills = fit.stan_variable("skill")
mean_skills = np.mean(skills, axis=0)

players = player_encoder.classes_
results_df = pd.DataFrame({
    'player': players,
    'skill': mean_skills,
    'skill_std': np.std(skills, axis=0)
}).sort_values('skill', ascending=False)

results_df.head(10)

Unnamed: 0,player,skill,skill_std
709,amir2002zzz,6.149062,1.690257
2348,chesscarl,6.0337,1.14012
10845,siindbad,6.014834,1.571361
8012,mmichael,5.996825,1.661663
10999,smilsydov,5.442687,1.229748
6919,liamschauerman,5.403297,1.798973
8792,oilen1401,5.354351,2.112978
9969,ridics,5.216526,1.180002
11892,tony995,5.14069,1.802736
4318,futuro_gm22,5.072594,1.725142


## Evaluation

In [75]:
...

# Model 2

Model with results win/lose/draw. Uses Rao and Kupper's variation of the Bradley-Terry model. For more information visit: 
* source 1: https://www.jstor.org/stable/2283595?seq=9 
* source 2: https://encyclopediaofmath.org/wiki/Paired_comparison_model

In [76]:
path = "../data/processed/games_clean.csv"
data = pd.read_csv(path)
    
player_encoder = LabelEncoder()
all_players = pd.concat([data['white_id'], data['black_id']]).unique()
player_encoder.fit(all_players)

data['white_id_encoded'] = player_encoder.transform(data['white_id'])
data['black_id_encoded'] = player_encoder.transform(data['black_id'])

data['result'] = data['result'].apply(lambda x: 2 if x > 0.5 else (1 if x == 0.5 else 0))

stan_data = {
        'N': len(data),
        'K': len(player_encoder.classes_),
        'white': player_encoder.transform(data['white_id']) + 1,
        'black': player_encoder.transform(data['black_id']) + 1,
        'result': data['result'].values,
        'theta_prior_mean': 2.0
    }

In [77]:
model_dir = "../models/stan_models/model2.stan"
posterior = CmdStanModel(stan_file = model_dir)
fit = posterior.sample(data = stan_data)

16:03:55 - cmdstanpy - INFO - compiling stan file /var/folders/9c/jg04wx2578v74k112x9wc48c0000gn/T/tmplmkb41e1/tmpvnnns927.stan to exe file /Users/hanseo/Documents/UCI/Spring 25/CS 179/project/CS179-Final-Project/models/stan_models/model2
16:04:04 - cmdstanpy - INFO - compiled model executable: /Users/hanseo/Documents/UCI/Spring 25/CS 179/project/CS179-Final-Project/models/stan_models/model2
16:04:04 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

16:06:41 - cmdstanpy - INFO - CmdStan done processing.





In [78]:
# Save model
save_dir = "../models/trained_models/stan/model2"
fit.save_csvfiles(dir=save_dir)

In [79]:
# Load model
load_dir = "../models/trained_models/stan/model2"
fit = cmdstanpy.from_csv(f"{save_dir}")

In [80]:
skills = fit.stan_variable("skill")
mean_skills = np.mean(skills, axis=0)

players = player_encoder.classes_
results_df = pd.DataFrame({
    'player': players,
    'skill': mean_skills,
    'skill_std': np.std(skills, axis=0)
}).sort_values('skill', ascending=False)

results_df.head(10)

Unnamed: 0,player,skill,skill_std
2348,chesscarl,2.864627,0.529789
10999,smilsydov,2.551158,0.537524
10845,siindbad,2.518322,0.603038
8012,mmichael,2.276905,0.611299
2503,christina-a-11,2.162887,0.578739
9969,ridics,2.038823,0.595194
11940,traced,1.956823,0.510173
3688,elvis1997,1.881738,0.432609
6638,krishchennai93,1.841691,0.68753
11715,theshelton,1.821735,0.586291


## Evaluation

In [None]:
...

# Model 3

Baseline model with only latent ratings, filtering any player with less than 4 games played

In [86]:
path = "../data/processed/games_clean.csv"
data = pd.read_csv(path)

white_counts = data['white_id'].value_counts()
black_counts = data['black_id'].value_counts()
total_counts = white_counts.add(black_counts, fill_value=0)

qualified_players = total_counts[total_counts >= 4].index
data = data[
    data['white_id'].isin(qualified_players) & 
    data['black_id'].isin(qualified_players)
]
    
player_encoder = LabelEncoder()
all_players = pd.concat([data['white_id'], data['black_id']]).unique()
player_encoder.fit(all_players)

data['white_id_encoded'] = player_encoder.transform(data['white_id'])
data['black_id_encoded'] = player_encoder.transform(data['black_id'])

data['result'] = data['result'].apply(lambda x: 2 if x > 0.5 else (1 if x == 0.5 else 0))

stan_data = {
        'N': len(data),
        'K': len(player_encoder.classes_),
        'white': player_encoder.transform(data['white_id']) + 1,
        'black': player_encoder.transform(data['black_id']) + 1,
        'result': data['result'].values,
        'theta_prior_mean': 2.0
    }

In [92]:
model_dir = "../models/stan_models/model3.stan"
posterior = CmdStanModel(stan_file = model_dir, )
fit = posterior.sample(data = stan_data)

16:09:34 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

16:09:41 - cmdstanpy - INFO - CmdStan done processing.





In [93]:
# Save model
save_dir = "../models/trained_models/stan/model3"
fit.save_csvfiles(dir=save_dir)

In [94]:
# Load model
load_dir = "../models/trained_models/stan/model3"
fit = cmdstanpy.from_csv(f"{save_dir}")

In [95]:
skills = fit.stan_variable("skill")
mean_skills = np.mean(skills, axis=0)

players = player_encoder.classes_
results_df = pd.DataFrame({
    'player': players,
    'skill': mean_skills,
    'skill_std': np.std(skills, axis=0)
}).sort_values('skill', ascending=False)

results_df.head(10)

Unnamed: 0,player,skill,skill_std
128,cdvh,2.005195,0.735567
140,chesscarl,1.650286,0.747374
368,laode_syahril,1.630026,0.634443
515,projetoxadrez,1.620197,0.800732
534,romanar2006,1.550053,0.73886
149,chiggen,1.513841,0.800045
266,great-mate-super-gre,1.357168,0.800836
668,vitaminex,1.334118,0.73941
173,daniamurashov,1.295491,0.7493
679,wiggleitjiggleit,1.292181,0.661252


## Evaluation

In [56]:
...

Ellipsis

# Model 4 (Tentative)

Skill estimation with piece color adjustment factor