In [None]:
!pip install pymc
!pip install numpyro
!pip install aeppl
!pip install jax

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd drive
%cd MyDrive
%cd cs179

/content/drive
/content/drive/MyDrive
/content/drive/MyDrive/cs179


In [4]:
import pandas as pd

csv_file_path = '/drive/MyDrive/cs179/games.csv'

#getting info
df = pd.read_csv('games.csv')

#print(df['white_id'])
white_ids = df['white_id']
black_ids =  df["black_id"]
winners = df["winner"]

print(white_ids[:5])

0         bourgris
1             a-00
2           ischia
3    daniamurashov
4        nik221107
Name: white_id, dtype: object


In [5]:
import numpy as np

uniqueppl = white_ids.tolist()
uniqueppl.extend(black_ids.tolist())

uppl = np.array(list(set(uniqueppl)))#set of unique people

n_players = len(uppl)

# reformat gamesplayed -> winner first, loser second
gamesplayed = []
for game in range(len(df)):
    white_id = df.iloc[game].white_id
    black_id = df.iloc[game].black_id

    if df.iloc[game].winner == "white":
        gamesplayed.append([white_id, black_id])
    if df.iloc[game].winner == "black":
        gamesplayed.append([black_id, white_id])

print(gamesplayed[0:5])

[['bourgris', 'a-00'], ['skinnerua', 'a-00'], ['ischia', 'a-00'], ['daniamurashov', 'adivanov2009'], ['nik221107', 'adivanov2009']]


In [6]:
import numpy as np
import pymc as pm

# convert str to int
player_to_id = {player: i for i, player in enumerate(uppl)}
player_ids = np.array([[player_to_id[winner], player_to_id[loser]] for winner, loser in gamesplayed])

# check sizes -> something is wrong here. tracce has more players than uppl
print(len(uppl))
print(len(player_to_id))
print(len(player_ids))

with pm.Model() as model:
    player_sd = pm.HalfNormal("player_sd", sigma=1.0)
    player_skills_raw = pm.Normal(
        "player_skills_raw", 0.0, sigma=1.0, shape=(n_players,)
    )
    player_skills = pm.Deterministic("player_skills", player_skills_raw * player_sd)

    winner_ids = player_ids[:, 0]
    loser_ids = player_ids[:, 1]

    logit_skills = player_skills[winner_ids] - player_skills[loser_ids]
    lik = pm.Bernoulli(
        "win_lik", logit_p=logit_skills, observed=np.ones(winner_ids.shape[0])
    )

    # Sample using the numpyro backend for JAX
    trace = pm.sample(1000, tune=1000, chains=4, cores=1, nuts_sampler='numpyro', random_seed=42)

15635
15635
19108


  pmap_numpyro = MCMC(
sample: 100%|██████████| 2000/2000 [00:46<00:00, 42.66it/s, 31 steps of size 1.15e-01. acc. prob=0.86]
sample: 100%|██████████| 2000/2000 [00:41<00:00, 48.33it/s, 31 steps of size 1.21e-01. acc. prob=0.85]
sample: 100%|██████████| 2000/2000 [00:42<00:00, 47.39it/s, 31 steps of size 1.26e-01. acc. prob=0.84]
sample: 100%|██████████| 2000/2000 [00:43<00:00, 46.05it/s, 31 steps of size 1.22e-01. acc. prob=0.85]


In [7]:
import arviz as az

summary = az.summary(trace, kind="stats")

In [8]:
# filter out raw player skills (useless)
player_skills_summary = summary[~summary.index.str.startswith('player_skills_raw')]
player_skills_summary = player_skills_summary[1:]

player_means = player_skills_summary['mean']
player_sds = player_skills_summary['sd']

# Create df
player_skills_df = pd.DataFrame({
    'Player Name': uppl,
    'Skill Mean': player_means.values,
    'Skill SD': player_sds.values
})

sorted_player_skills_df = player_skills_df.sort_values(by='Skill Mean', ascending=False)
print(sorted_player_skills_df)


               Player Name  Skill Mean  Skill SD
7887             chesscarl       3.474     0.685
12430             siindbad       3.272     0.814
7035              mmichael       3.104     0.823
9655           amir2002zzz       2.961     0.835
11607           steelviper       2.861     0.839
...                    ...         ...       ...
13574           sveenemand      -2.771     0.666
3121              ghaffari      -2.786     0.716
4142   josephelbouhessaini      -2.878     0.875
8515              mccheese      -2.893     0.717
12091           stellanova      -3.150     0.814

[15635 rows x 3 columns]


In [9]:
# Concatenate white_id and black_id columns to get all player ids
all_player_ids = pd.concat([df['white_id'], df['black_id']]).unique()

player_max_ratings = {}

for player_id in all_player_ids:
    # Get maximum rating for the current player from both white and black ratings
    max_rating = max(df.loc[df['white_id'] == player_id, 'white_rating'].max(),
                     df.loc[df['black_id'] == player_id, 'black_rating'].max())
    # Store the maximum rating for the player
    player_max_ratings[player_id] = max_rating

sorted_player_max_ratings = sorted(player_max_ratings.items(), key=lambda x: x[1], reverse=True)

max_rating_df = pd.DataFrame(sorted_player_max_ratings, columns=['Player Name', 'Elo Rating'])

print(max_rating_df)

               Player Name  Elo Rating
0               justicebot      2723.0
1              blitzbullet      2622.0
2                lance5500      2621.0
3      shahoviy_komentator      2586.0
4               teatime007      2579.0
...                    ...         ...
15630             seabacon         NaN
15631      yamaguchipolgar         NaN
15632   farrukhasomiddinov         NaN
15633         schaaksmurf3         NaN
15634                ffbob         NaN

[15635 rows x 2 columns]


In [10]:
# see how top 5 estimates are actually ranked (0 being the best, 15634 being the worst)

print(max_rating_df.loc[max_rating_df['Player Name'] == 'chesscarl'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'siindbad'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'mmichael'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'amir2002zzz'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'steelviper'].index[0])
print()

# see how bottom 5 estimates are actually ranked (0 being the best, 15634 being the worst)

print(max_rating_df.loc[max_rating_df['Player Name'] == 'ghaffari'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'sveenemand'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'josephelbouhessaini'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'mccheese'].index[0])
print(max_rating_df.loc[max_rating_df['Player Name'] == 'stellanova'].index[0])



34
71
185
15
4867

7434
8501
5816
5742
7948
