In [19]:
import pandas as pd
from math import log
import numpy as np

In [2]:
INITIAL_ELO_RATING = 1300
K = 20
SEASON = 2023

In [3]:
def calculate_win_prob(elo1, elo2) -> float:
    return 1 / (10 ** (-(elo1 - elo2) / 400) + 1)

def calculate_elo_change(
        winner_elo, loser_elo, location, 
        winner_score, loser_score, k
):
    diff_elo = abs(winner_elo - loser_elo)
    if location == 'H':
        diff_elo += 100
    # Probability of victory
    prob = 1 / (10 ** (-diff_elo / 400) + 1)

    # Victory Margin
    v_mar = 1 + log((winner_score - loser_score) + 1) * (2.2 / ((winner_elo - loser_elo) * .001 + 2.2))

    # Elo change
    d_elo = (winner_elo + k * (v_mar - prob)) - winner_elo

    return d_elo

In [11]:
teams_df = pd.concat([
    pd.read_csv("data_2023/MTeams.csv", index_col='TeamID'),
    pd.read_csv("data_2023/WTeams.csv", index_col='TeamID'),
])

elo_df = teams_df.copy()
elo_df["elo_rating"] = INITIAL_ELO_RATING
elo_df

Unnamed: 0_level_0,TeamName,FirstD1Season,LastD1Season,elo_rating
TeamID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1101,Abilene Chr,2014.0,2023.0,1300
1102,Air Force,1985.0,2023.0,1300
1103,Akron,1985.0,2023.0,1300
1104,Alabama,1985.0,2023.0,1300
1105,Alabama A&M,2000.0,2023.0,1300
...,...,...,...,...
3473,Lindenwood,,,1300
3474,Queens NC,,,1300
3475,Southern Indiana,,,1300
3476,Stonehill,,,1300


In [12]:

reg_season_df = pd.concat([
    pd.read_csv("data_2023/MRegularSeasonCompactResults.csv.zip", compression="zip"),
    pd.read_csv("data_2023/WRegularSeasonCompactResults.csv.zip", compression="zip"),
])
reg_season_df = reg_season_df[reg_season_df["Season"] == SEASON]
reg_season_df

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
176080,2023,7,1101,65,1238,56,H,0
176081,2023,7,1103,81,1355,80,H,1
176082,2023,7,1104,75,1255,54,H,0
176083,2023,7,1112,117,1311,75,H,0
176084,2023,7,1113,62,1470,59,H,0
...,...,...,...,...,...,...,...,...
126168,2023,132,3179,89,3125,71,N,0
126169,2023,132,3221,66,3131,61,A,0
126170,2023,132,3235,61,3400,51,N,0
126171,2023,132,3284,80,3406,55,A,0


https://fivethirtyeight.com/features/how-we-calculate-nba-elo-ratings/

In [13]:
for index, row in reg_season_df.iterrows():
    # w_ind = elo_df.index[elo_df['TeamID'] == row['WTeamID']]
    # print(row['WTeamID'])
    # print(type(row['WTeamID']))
    w_elo = elo_df.at[row['WTeamID'], "elo_rating"]
    # print(f'w_elo: {w_elo}')
    # l_ind = elo_df.index[elo_df['TeamID'] == row['LTeamID']]
    l_elo = elo_df.at[row['LTeamID'], "elo_rating"]
    # print(f'l_elo: {l_elo}')

    # ratings difference
    diff_elo = abs(w_elo - l_elo) + 100 if row['WLoc'] == 'H' else abs(w_elo - l_elo)
    # print(f'diff_elo: {diff_elo}')
    # expected score difference
    we = 1 / (10 ** (-diff_elo / 400) + 1)
    # print(f'we: {we}')
    # victory margin
    v_mar = 1 + log((row['WScore'] - row['LScore']) + 1) * (2.2/((w_elo - l_elo) * .001 + 2.2))
    # elo change
    d_elo = (w_elo + K * (v_mar - we)) - w_elo
    # print(f'd_elo: {d_elo}')

    elo_df.at[row['WTeamID'], "elo_rating"] = w_elo + d_elo
    elo_df.at[row['LTeamID'], "elo_rating"] = l_elo - d_elo

    reg_season_df.at[index, 'old_w_elo'] = w_elo
    reg_season_df.at[index, 'old_l_elo'] = l_elo
    reg_season_df.at[index, 'diff_elo'] = diff_elo
    reg_season_df.at[index, 'excpected_score_diff'] = we
    reg_season_df.at[index, 'victory_margin'] = v_mar
    reg_season_df.at[index, 'elo_change'] = d_elo
    reg_season_df.at[index, 'new_w_elo'] = w_elo + d_elo
    reg_season_df.at[index, 'new_l_elo'] = l_elo - d_elo

reg_season_df

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,old_w_elo,old_l_elo,diff_elo,excpected_score_diff,victory_margin,elo_change,new_w_elo,new_l_elo
176080,2023,7,1101,65,1238,56,H,0,1300.000000,1300.000000,100.000000,0.640065,3.302585,53.250402,1353.250402,1246.749598
176081,2023,7,1103,81,1355,80,H,1,1300.000000,1300.000000,100.000000,0.640065,1.693147,21.061644,1321.061644,1278.938356
176082,2023,7,1104,75,1255,54,H,0,1300.000000,1300.000000,100.000000,0.640065,4.091042,69.019549,1369.019549,1230.980451
176083,2023,7,1112,117,1311,75,H,0,1300.000000,1300.000000,100.000000,0.640065,4.761200,82.422702,1382.422702,1217.577298
176084,2023,7,1113,62,1470,59,H,0,1300.000000,1300.000000,100.000000,0.640065,2.386294,34.924587,1334.924587,1265.075413
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126168,2023,132,3179,89,3125,71,N,0,2048.127735,1981.850648,66.277086,0.594240,3.858329,65.281781,2113.409516,1916.568867
126169,2023,132,3221,66,3131,61,A,0,1837.402251,2091.039153,253.636902,0.811541,3.025249,44.274179,1881.676430,2046.764974
126170,2023,132,3235,61,3400,51,N,0,2075.421417,2370.236359,294.814942,0.845154,3.768954,58.475989,2133.897406,2311.760370
126171,2023,132,3284,80,3406,55,A,0,1322.766819,1802.595648,479.828829,0.940594,5.166918,84.526481,1407.293301,1718.069167


In [14]:
elo_df.to_csv('data_2023/eloRatings.csv')

In [15]:
elo_df

Unnamed: 0_level_0,TeamName,FirstD1Season,LastD1Season,elo_rating
TeamID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1101,Abilene Chr,2014.0,2023.0,1013.907566
1102,Air Force,1985.0,2023.0,1167.004326
1103,Akron,1985.0,2023.0,1699.692673
1104,Alabama,1985.0,2023.0,2418.112808
1105,Alabama A&M,2000.0,2023.0,938.503720
...,...,...,...,...
3473,Lindenwood,,,208.873467
3474,Queens NC,,,409.938074
3475,Southern Indiana,,,942.352119
3476,Stonehill,,,771.419983


In [20]:
teams = elo_df.index.tolist()
probabilites = []
for i in range(len(teams) - 1):
    for j in range(i + 1, len(teams)):
        elo_1 = elo_df.at[teams[i], "elo_rating"]
        elo_2 = elo_df.at[teams[j], "elo_rating"]
        prob = np.round(calculate_win_prob(elo_1, elo_2), 4)
        probabilites.append([f"{SEASON}_{teams[i]}_{teams[j]}", prob])
        
prob_df = pd.DataFrame(probabilites, columns=["ID", "Pred"])
prob_df

Unnamed: 0,ID,Pred
0,2023_1101_1102,0.2929
1,2023_1101_1103,0.0189
2,2023_1101_1104,0.0003
3,2023_1101_1105,0.6068
4,2023_1101_1106,0.9549
...,...,...
282371,2023_3474_3476,0.1110
282372,2023_3474_3477,0.0352
282373,2023_3475_3476,0.7279
282374,2023_3475_3477,0.4391


In [21]:
prob_df.to_csv("data_2023/submission2023.csv", index=False)