# Tennis Trueskill Ranking

In [1]:
import pandas as pd
import numpy as np

import trueskill

In [2]:
utr_train = pd.read_csv('utr_train_UPDATED.csv')
utr_test = pd.read_csv('utr_test_UPDATED.csv')

In [3]:
utr_train.head()

Unnamed: 0,resultid,resultmonth,winnerid,loserid,winnerset1,winnerset2,winnerset3,winnerset4,winnerset5,loserset1,loserset2,loserset3,loserset4,loserset5
0,1,2019-05,57529,3765,7,7,0,0,0,6,5,0,0,0
1,2,2019-03,83218,3871,6,6,0,0,0,4,2,0,0,0
2,4,2019-11,4021,4487,7,5,7,0,0,5,7,5,0,0
3,5,2019-10,1984892,411593,6,6,0,0,0,3,3,0,0,0
4,7,2019-09,52294,224678,6,6,0,0,0,3,2,0,0,0


In [4]:
#utr_train = utr_train[['resultmonth','winnerid','loserid']]
utr_train = utr_train.sort_values('resultmonth')
utr_train = utr_train.drop(columns=['resultmonth'])
utr_train

Unnamed: 0,resultid,winnerid,loserid,winnerset1,winnerset2,winnerset3,winnerset4,winnerset5,loserset1,loserset2,loserset3,loserset4,loserset5
420,578,11063,45027,6,6,0,0,0,1,0,0,0,0
2818,4026,847617,10788,1,6,6,0,0,6,3,4,0,0
2816,4022,11076,2555950,6,6,0,0,0,3,2,0,0,0
1593,2249,58235,79636,6,6,0,0,0,2,3,0,0,0
1589,2244,3498,83218,4,6,6,0,0,6,4,4,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,382,86294,233912,6,6,0,0,0,3,3,0,0,0
3886,5591,238409,153449,4,6,6,0,0,6,3,1,0,0
3903,5613,50083,191924,2,6,6,0,0,6,2,2,0,0
2668,3816,11640,42381,1,6,6,0,0,6,1,2,0,0


In [5]:
player_list = (utr_train['winnerid'].append(utr_train['loserid'])).unique()

player_df = pd.DataFrame(player_list, columns=['id'])

# Trueskill

In [6]:
from trueskill import rate_1vs1
from trueskill import Rating

player_df = player_df.assign(rating = Rating())

In [7]:
player_df

Unnamed: 0,id,rating
0,11063,"trueskill.Rating(mu=25.000, sigma=8.333)"
1,847617,"trueskill.Rating(mu=25.000, sigma=8.333)"
2,11076,"trueskill.Rating(mu=25.000, sigma=8.333)"
3,58235,"trueskill.Rating(mu=25.000, sigma=8.333)"
4,3498,"trueskill.Rating(mu=25.000, sigma=8.333)"
...,...,...
1141,34161,"trueskill.Rating(mu=25.000, sigma=8.333)"
1142,217710,"trueskill.Rating(mu=25.000, sigma=8.333)"
1143,25603,"trueskill.Rating(mu=25.000, sigma=8.333)"
1144,90999,"trueskill.Rating(mu=25.000, sigma=8.333)"


In [8]:
for i in range(0,len(utr_train)):
    m_winner = utr_train.iloc[i,1]
    m_loser = utr_train.iloc[i,2]
    
    winner_df_index = player_df.id[player_df.id == m_winner].index[0]
    loser_df_index = player_df.id[player_df.id == m_loser].index[0]
    
    winner_rating = player_df.iloc[winner_df_index,1]
    loser_rating = player_df.iloc[loser_df_index,1]

    new_winner_rank, new_loser_rank = rate_1vs1(winner_rating, loser_rating)
    
    #print(m_winner, m_loser)
    #print(winner_df_index, loser_df_index)
    #print(winner_rating, loser_rating)
    #print(new_winner_rank, new_loser_rank)
    
    # Update the rankings in player_df after each 1v1
    
    player_df.at[winner_df_index,'rating'] = new_winner_rank
    player_df.at[loser_df_index,'rating'] = new_loser_rank


    

In [9]:
player_df

Unnamed: 0,id,rating
0,11063,"trueskill.Rating(mu=26.667, sigma=2.403)"
1,847617,"trueskill.Rating(mu=21.580, sigma=3.627)"
2,11076,"trueskill.Rating(mu=29.051, sigma=2.338)"
3,58235,"trueskill.Rating(mu=27.908, sigma=2.572)"
4,3498,"trueskill.Rating(mu=26.458, sigma=2.067)"
...,...,...
1141,34161,"trueskill.Rating(mu=17.996, sigma=6.576)"
1142,217710,"trueskill.Rating(mu=17.396, sigma=6.520)"
1143,25603,"trueskill.Rating(mu=17.834, sigma=6.242)"
1144,90999,"trueskill.Rating(mu=16.749, sigma=6.553)"


# Calculating Win Probability

In [10]:
import itertools
import math
from trueskill import BETA

def win_probability(team1, team2):
    delta_mu = team1.mu - team2.mu
    sum_sigma = (team1.sigma ** 2) + (team2.sigma ** 2)
    size = 2
    denom = math.sqrt(size * (BETA * BETA) + sum_sigma)
    ts = trueskill.global_env()
    return ts.cdf(delta_mu / denom)

    #variation from Juho Snellman

In [11]:
# Adding a prob column to player df to test
utr_train = utr_train.assign(prob = 0.0)
utr_train = utr_train.reset_index()
utr_train

Unnamed: 0,index,resultid,winnerid,loserid,winnerset1,winnerset2,winnerset3,winnerset4,winnerset5,loserset1,loserset2,loserset3,loserset4,loserset5,prob
0,420,578,11063,45027,6,6,0,0,0,1,0,0,0,0,0.0
1,2818,4026,847617,10788,1,6,6,0,0,6,3,4,0,0,0.0
2,2816,4022,11076,2555950,6,6,0,0,0,3,2,0,0,0,0.0
3,1593,2249,58235,79636,6,6,0,0,0,2,3,0,0,0,0.0
4,1589,2244,3498,83218,4,6,6,0,0,6,4,4,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4547,264,382,86294,233912,6,6,0,0,0,3,3,0,0,0,0.0
4548,3886,5591,238409,153449,4,6,6,0,0,6,3,1,0,0,0.0
4549,3903,5613,50083,191924,2,6,6,0,0,6,2,2,0,0,0.0
4550,2668,3816,11640,42381,1,6,6,0,0,6,1,2,0,0,0.0


In [12]:
for i in range(0,len(utr_train)):
    m_winner = utr_train.iloc[i,2]
    m_loser = utr_train.iloc[i,3]
    
    winner_df_index = player_df.id[player_df.id == m_winner].index[0]
    loser_df_index = player_df.id[player_df.id == m_loser].index[0]
    
    winner_rating = player_df.iloc[winner_df_index,1]
    loser_rating = player_df.iloc[loser_df_index,1]
    
    current_prob = win_probability(winner_rating, loser_rating)
    utr_train.at[i,'prob'] = current_prob
    
    #print(m_winner,m_loser)
    #print(winner_df_index,loser_df_index)
    #print(winner_rating,loser_rating)
    #print(current_prob)
    #print('\n')


In [13]:
utr_train[:30]

Unnamed: 0,index,resultid,winnerid,loserid,winnerset1,winnerset2,winnerset3,winnerset4,winnerset5,loserset1,loserset2,loserset3,loserset4,loserset5,prob
0,420,578,11063,45027,6,6,0,0,0,1,0,0,0,0,0.383749
1,2818,4026,847617,10788,1,6,6,0,0,6,3,4,0,0,0.436805
2,2816,4022,11076,2555950,6,6,0,0,0,3,2,0,0,0,0.797004
3,1593,2249,58235,79636,6,6,0,0,0,2,3,0,0,0,0.796244
4,1589,2244,3498,83218,4,6,6,0,0,6,4,4,0,0,0.144543
5,2082,2954,3691,3469,7,7,0,0,0,5,5,0,0,0,0.440983
6,3814,5486,3564,224142,6,7,6,0,0,4,5,1,0,0,0.487718
7,2083,2955,52337,51245,7,7,0,0,0,5,6,0,0,0,0.458536
8,3830,5505,3832,52294,6,6,7,0,0,1,1,6,0,0,0.48234
9,3832,5508,10788,1609206,6,5,7,0,0,2,7,5,0,0,0.674661


In [14]:
utr_test

Unnamed: 0,resultid,resultmonth,player1,player2,player1winprobability
0,3,2019-02,4372,3464,
1,6,2019-02,3671,3507,
2,8,2019-08,4584,53374,
3,9,2019-08,10759,10751,
4,13,2019-10,405461,1194009,
...,...,...,...,...,...
1839,6524,2019-11,3654,87175,
1840,6537,2019-11,79558,54130,
1841,6543,2019-02,1517181,4031,
1842,6545,2019-08,10751,10274,


In [15]:
for i in range(0,len(utr_test)):
    m_winner = utr_test.iloc[i,2]
    m_loser = utr_test.iloc[i,3]
    
    winner_df_index = player_df.id[player_df.id == m_winner].index[0]
    loser_df_index = player_df.id[player_df.id == m_loser].index[0]
    
    winner_rating = player_df.iloc[winner_df_index,1]
    loser_rating = player_df.iloc[loser_df_index,1]
    
    current_prob = win_probability(winner_rating, loser_rating)
    utr_test.at[i,'player1winprobability'] = current_prob

In [16]:
utr_test

Unnamed: 0,resultid,resultmonth,player1,player2,player1winprobability
0,3,2019-02,4372,3464,0.690481
1,6,2019-02,3671,3507,0.379724
2,8,2019-08,4584,53374,0.146433
3,9,2019-08,10759,10751,0.904549
4,13,2019-10,405461,1194009,0.670049
...,...,...,...,...,...
1839,6524,2019-11,3654,87175,0.400544
1840,6537,2019-11,79558,54130,0.812162
1841,6543,2019-02,1517181,4031,0.159458
1842,6545,2019-08,10751,10274,0.172955
