Rugby FL DS

In [9]:
import pandas as pd

results_df = pd.read_csv('results.csv')
results_df.head()

print(len(results_df['Game_ID'].unique()))

3893


In [10]:
def restructure_dataframe(df):
    new_cols = [
        'Game_ID', 'Championship_ID', 'Team_ID', 'Team_Name', 'Team_Score', 
        'Opponent_ID', 'Opponent_Name', 'Opponent_Score', 'Game_Date', 'Is_Home'
    ]
    home_df = df.rename(columns={
        'Home_Club_ID': 'Team_ID',
        'Home_Club_AKA': 'Team_Name',
        'Home_Score': 'Team_Score',
        'Away_Club_ID': 'Opponent_ID',
        'Away_Club_AKA': 'Opponent_Name',
        'Away_Score': 'Opponent_Score'
    })
    home_df['Is_Home'] = True

    away_df = df.rename(columns={
        'Away_Club_ID': 'Team_ID',
        'Away_Club_AKA': 'Team_Name',
        'Away_Score': 'Team_Score',
        'Home_Club_ID': 'Opponent_ID',
        'Home_Club_AKA': 'Opponent_Name',
        'Home_Score': 'Opponent_Score'
    })
    away_df['Is_Home'] = False
    
    restructured_df = pd.concat([home_df, away_df], ignore_index=True)

    # Reorder the columns
    restructured_df = restructured_df[new_cols]

    return restructured_df

results_df = restructure_dataframe(results_df)

In [12]:
# Create a season column, which is the year of the game
results_df['Game_Date'] = pd.to_datetime(results_df['Game_Date'])
results_df['Season'] = results_df['Game_Date'].dt.year

print(len(results_df['Game_ID'].unique()))
print(results_df.shape)

3893
(7786, 11)


In [80]:
# functions to calculate each part of the elo formula

def find_K(MOV, elo_diff):
    # positive MOV means home team won
    K_0 = 20
    if MOV > 0: # Margin of victory
        multiplier = (MOV + 3) ** 0.8 / (7.5 + 0.006 * elo_diff)
    else:
        multiplier = (-MOV + 3) ** 0.8 / (7.5 + 0.006 * -elo_diff)
    # returns two values, one for each team
    return K_0 * multiplier, K_0 * multiplier


def find_S(MOV):
    if MOV > 0:  # home team won
        return 1, 0
    elif MOV < 0:  # home team lost
        return 0, 1
    else:
        return 0.5, 0.5  # tie


def find_E(elo_home, elo_away):
    home_adv = 100
    elo_home += home_adv

    E_home = 1 / (1 + 10 ** ((elo_away - elo_home) / 400.0))
    E_away = 1 / (1 + 10 ** ((elo_home - elo_away) / 400.0))

    return E_home, E_away


def find_new_elo(elo_home, elo_away, MOV):
    elo_diff = elo_home - elo_away
    K_home, K_away = find_K(MOV, elo_diff)
    S_home, S_away = find_S(MOV)
    E_home, E_away = find_E(elo_home, elo_away)

    elo_home = elo_home + K_home * (S_home - E_home)
    elo_away = elo_away + K_away * (S_away - E_away)

    return elo_home, elo_away


def find_decay_elo(elo):
    # 1505 to aim for a 1500 mean
    # due to new teams joining the league
    return elo * 0.75 + 1505 * 0.25

In [81]:
import elo

elo_dict = {}
starting_elo = 1300

for team in results_df['Team_Name'].unique():
    elo_dict[team] = starting_elo
    
results_df['team_elo'] = starting_elo
results_df['opp_elo'] = starting_elo
results_df['elo_change'] = 0  # How the elo changed after the game

start_season = results_df['Season'].min()
current_season = start_season

# Loop through each season by game_id and update both rows with the same game_id
def find_elo_ratings(season):
    games_seen = set()
    season_df = results_df[results_df['Season'] == season]
    
    for index, row in season_df.iterrows():
        game_id = row['Game_ID']
        if game_id in games_seen:
            continue
        games_seen.add(game_id)
        
        team_name = row['Team_Name']
        opp_name = row['Opponent_Name']
        team_elo = elo_dict[team_name]
        opp_elo = elo_dict[opp_name]
        MOV = row['Team_Score'] - row['Opponent_Score']
        
        new_team_elo, new_opp_elo = find_new_elo(team_elo, opp_elo, MOV)
        elo_dict[team_name] = new_team_elo
        elo_dict[opp_name] = new_opp_elo
        
        results_df.loc[index, 'team_elo'] = team_elo
        results_df.loc[index, 'opp_elo'] = opp_elo
        results_df.loc[index, 'elo_change'] = new_team_elo - team_elo

seasons = results_df['Season'].unique()
for season in seasons:
    # Not decaying ratings
    # if season != start_season:
    #     for team in elo_dict:
    #         elo_dict[team] = find_decay_elo(elo_dict[team])
    find_elo_ratings(season)

  results_df.loc[index, 'elo_change'] = new_team_elo - team_elo
  results_df.loc[index, 'team_elo'] = team_elo
  results_df.loc[index, 'opp_elo'] = opp_elo


In [82]:
results_df.groupby('Team_Name')['team_elo'].max().sort_values(ascending=False)

Team_Name
Okapi Rugby             1897.128720
Miami Rugby FC          1886.786909
Miami Tridents          1836.556744
Pelicans                1761.880257
Palm Beach              1728.725860
                           ...     
Weston Youth            1351.250000
Estero Spartans RFC     1345.948744
Indian River Raptors    1326.885041
SouthTech               1300.000000
Florida Tech Rugby      1300.000000
Name: team_elo, Length: 84, dtype: float64

In [85]:
# Graph of team elo over time
import matplotlib.pyplot as plt

team = 'Miami Tridents'
team_df = results_df[results_df['Team_Name'] == team]

team_df


Unnamed: 0,Game_ID,Championship_ID,Team_ID,Team_Name,Team_Score,Opponent_ID,Opponent_Name,Opponent_Score,Game_Date,Is_Home,Season,team_elo,opp_elo,elo_change
2,51,9,6,Miami Tridents,12,12,Boca Raton,42,2012-11-10,True,2012,1289.036020,1300.000000,-27.110971
5,54,9,6,Miami Tridents,23,1,Miami Rugby FC,22,2012-11-17,True,2012,1261.925049,1253.844395,2.805461
10,59,9,6,Miami Tridents,29,5,Ft. Lauderdale,20,2012-12-15,True,2012,1290.629771,1298.331561,7.251825
19,68,9,6,Miami Tridents,10,13,Hammerheads,34,2013-01-26,True,2013,1335.574291,1407.407654,-19.034687
149,328,9,6,Miami Tridents,0,54,Rugby in Florida,0,2012-12-01,True,2012,1285.726683,1298.473487,-0.781877
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7507,10243,157,6,Miami Tridents,43,11,Orlando RFC,0,2024-02-24,False,2024,1300.000000,1300.000000,0.000000
7519,10259,157,6,Miami Tridents,48,7,Tampa Krewe,20,2024-04-06,False,2024,1300.000000,1300.000000,0.000000
7646,10434,158,6,Miami Tridents,7,125,Miami Sharks,28,2023-12-02,False,2023,1300.000000,1300.000000,0.000000
7647,10435,158,6,Miami Tridents,78,13,Hammerheads,7,2023-12-09,False,2023,1300.000000,1300.000000,0.000000
