Rugby FL DS

In [2]:
import pandas as pd

In [73]:
results_df = pd.read_csv('results.csv')

def restructure_dataframe(df):
    # Create Season column
    results_df['Game_Date'] = pd.to_datetime(results_df['Game_Date'])
    results_df['Season'] = results_df['Game_Date'].dt.year + 1 * (results_df['Game_Date'].dt.month >= 9)
    
    new_cols = [
        'Game_ID', 'Championship_ID', 'Team_ID', 'Team_Name', 'Team_Score', 
        'Opponent_ID', 'Opponent_Name', 'Opponent_Score', 'Game_Date', 'Season', 'Is_Home'
    ]
    home_df = df.rename(columns={
        'Home_Club_ID': 'Team_ID',
        'Home_Club_AKA': 'Team_Name',
        'Home_Score': 'Team_Score',
        'Away_Club_ID': 'Opponent_ID',
        'Away_Club_AKA': 'Opponent_Name',
        'Away_Score': 'Opponent_Score'
    })
    home_df['Is_Home'] = True

    away_df = df.rename(columns={
        'Away_Club_ID': 'Team_ID',
        'Away_Club_AKA': 'Team_Name',
        'Away_Score': 'Team_Score',
        'Home_Club_ID': 'Opponent_ID',
        'Home_Club_AKA': 'Opponent_Name',
        'Home_Score': 'Opponent_Score'
    })
    away_df['Is_Home'] = False
    
    restructured_df = pd.concat([home_df, away_df], ignore_index=False)

    # Reorder the columns
    restructured_df = restructured_df[new_cols]
    # Sort the rows by Game_ID
    restructured_df = restructured_df.sort_values(by='Game_ID')

    return restructured_df

# Drop any 0-0 games, has to be OR
results_df = results_df[(results_df['Home_Score'] != 0) | (results_df['Away_Score'] != 0)]

# Drop any game with scores over 200
results_df = results_df[(results_df['Home_Score'] <= 200) & (results_df['Away_Score'] <= 200)]

# Drop any games with Miami Sharks
results_df = results_df[results_df['Home_Club_AKA'] != 'Miami Sharks']
results_df = results_df[results_df['Away_Club_AKA'] != 'Miami Sharks']

# Drop games with two out of state
results_df = results_df[~((results_df['Home_Club_AKA'] == 'Out of State') & (results_df['Away_Club_AKA'] == 'Out of State'))]

results_df = restructure_dataframe(results_df)

# Add columns for ELO
results_df['Team_ELO'] = 1500
results_df['Opponent_ELO'] = 1500
results_df['ELO_Change'] = 0

# Sort by date
results_df = results_df.sort_values(by='Game_Date')

results_df.to_csv('rugbyfl_data.csv', index=False)

In [77]:
elo_df = pd.read_csv('rugbyfl_elo.csv')

In [78]:
# Any team with more than 1800 elo we check out
sus = elo_df[elo_df['Team_ELO'] > 1800]

In [79]:
for i, row in sus.iterrows():
    # print nice table
    print(list(row))

[8720, 134, 44, 'Out of State', 7, 102, 'Okapi Rugby', 52, '2022-04-28', 2022, False, 1842, 1742, -32]
[8884, 146, 102, 'Okapi Rugby', 73, 75, 'Treasure Coast Armada', 5, '2023-02-25', 2023, True, 1801, 1412, 3]
[10097, 152, 102, 'Okapi Rugby', 33, 7, 'Tampa Krewe', 5, '2023-03-04', 2023, True, 1804, 1440, 2]
[10079, 150, 102, 'Okapi Rugby', 69, 7, 'Tampa Krewe', 0, '2023-03-04', 2023, True, 1806, 1456, 4]
[10098, 152, 102, 'Okapi Rugby', 49, 55, 'Boca Raton Juniors', 17, '2023-03-11', 2023, False, 1810, 1655, 17]
[10080, 150, 102, 'Okapi Rugby', 28, 55, 'Boca Raton Juniors', 7, '2023-03-16', 2023, False, 1827, 1638, 11]
[8901, 146, 102, 'Okapi Rugby', 20, 118, 'SoFlo', 0, '2023-03-18', 2023, False, 1838, 1461, 4]
[10066, 146, 102, 'Okapi Rugby', 0, 40, 'Gainesville Hogs', 20, '2023-03-25', 2023, False, 1842, 1506, -35]
[10128, 146, 102, 'Okapi Rugby', 26, 117, 'Claymores RFC', 27, '2023-04-01', 2023, True, 1807, 1574, -8]
[10100, 152, 102, 'Okapi Rugby', 5, 42, 'KB Rugby Rats', 21, '2

In [94]:
# Group by Team_Name and count total games
season_2024 = results_df[results_df['Season'] == 2024]
team_games = season_2024.groupby('Team_Name').size().reset_index(name='Total_Games')

In [95]:
for i, row in team_games.iterrows():
    print(list(row))

['Ave Maria', 6]
['Boca Raton', 11]
['Boca Raton Juniors', 23]
['Brevard', 12]
['Cardinal Gibbons ', 9]
['Claymores RFC', 7]
['Daytona M', 11]
['Eckerd Rugby M', 13]
['Eckerd Rugby W', 6]
['FAU Rugby Men', 13]
['FIU M', 8]
['FIU W', 2]
['Florida State M', 13]
['Florida State W', 4]
['Fort Miami', 7]
['Ft. Lauderdale', 9]
['Gainesville Hogs', 11]
['Hammerheads', 11]
['Jacksonville M', 10]
['Jacksonville W', 9]
['Jacksonville Wolverines ', 14]
['KB Rugby Rats', 34]
['Lakeland', 5]
['Miami Rugby FC', 8]
['Miami Tridents', 19]
['Millennia', 6]
['Okapi Rugby', 38]
['Orlando RFC', 19]
['Out of State', 19]
['Palm Beach', 10]
['Pelicans', 19]
['Sarasota', 7]
['St. Thomas', 16]
['Tallahassee RFC', 6]
['Tampa Krewe', 33]
['Treasure Coast Armada', 6]
['UCF M', 15]
['UCF W', 5]
['UF Men', 14]
['UF Women', 5]
['UM Men', 7]
['UM Women', 1]
['UNF RFC', 13]
['USF - W', 2]
['USF M', 12]
['Wellington Wizards', 24]


In [97]:
for i, row in season_2024[season_2024['Team_Name']=='Pelicans'].iterrows():
    print(list(row))

[10343, 158, 10, 'Pelicans', 33, 40, 'Gainesville Hogs', 10, Timestamp('2023-10-21 00:00:00'), 2024, True, 1500, 1500, 0]
[10342, 158, 10, 'Pelicans', 17, 24, 'Brevard', 7, Timestamp('2023-10-21 00:00:00'), 2024, False, 1500, 1500, 0]
[10340, 158, 10, 'Pelicans', 33, 7, 'Tampa Krewe', 17, Timestamp('2023-10-27 00:00:00'), 2024, True, 1500, 1500, 0]
[10339, 158, 10, 'Pelicans', 45, 44, 'Out of State', 29, Timestamp('2023-12-09 00:00:00'), 2024, True, 1500, 1500, 0]
[10310, 162, 10, 'Pelicans', 17, 117, 'Claymores RFC', 17, Timestamp('2024-01-20 00:00:00'), 2024, True, 1500, 1500, 0]
[10300, 157, 10, 'Pelicans', 23, 11, 'Orlando RFC', 5, Timestamp('2024-01-20 00:00:00'), 2024, True, 1500, 1500, 0]
[10316, 162, 10, 'Pelicans', 19, 75, 'Treasure Coast Armada', 19, Timestamp('2024-01-27 00:00:00'), 2024, True, 1500, 1500, 0]
[10233, 157, 10, 'Pelicans', 23, 7, 'Tampa Krewe', 14, Timestamp('2024-01-27 00:00:00'), 2024, True, 1500, 1500, 0]
[10319, 162, 10, 'Pelicans', 3, 24, 'Brevard', 29, T

In [100]:
teams = results_df['Team_Name'].unique()
print(list(teams))

['UM Men', 'FAU Rugby Men', 'Ave Maria', 'Tampa Krewe', 'USF M', 'UF Men', 'Gainesville Hogs', 'FAU Rugby W', 'Miami Tridents', 'FGCU', 'USF - W', 'FGCU Women', 'Eckerd Rugby W', 'Out of State', 'Hammerheads', 'Orlando RFC', 'Ft. Lauderdale', 'Florida State W', 'UCF W', 'UCF M', 'Miami Rugby FC', 'Palm Beach', 'Boca Raton', 'Treasure Coast Armada', 'FIU W', 'FIU M', 'UM Women', 'UNF RFC', 'UF Women', 'Eckerd Rugby M', 'Florida State M', 'Jacksonville M', 'Pelicans', 'Tallahassee RFC', 'Jacksonville W', 'Daytona M', 'Miami Rugby Youth', 'Key West  Rugby', 'Sarasota', 'Brevard', 'Indian River Raptors', 'Deltona Rugby', 'Hammerhead Youth Rugby', 'Boca Raton Juniors', 'Daytona U19', 'Cape Pirates', 'Estero Spartans RFC', 'Trojan U19', 'Weston U19', 'Rugby in Florida', 'Rugby Club', 'Weston Youth', 'Fort Miami', 'KB Rugby Rats', 'Florida Youth HPP', 'Riviera Beach', 'West Pines Rugby', 'Archbishop McCarthy', 'Cardinal Gibbons ', 'Florida Tech Rugby', 'FRU  HPP', 'Okapi Rugby', 'Aquinas Rugb

In [101]:
college_teams = ['UM Men', 'FAU Rugby Men', 'Ave Maria', 'USF M', 'UF Men', 'FAU Rugby W', 'FGCU', 'USF - W', 'FGCU Women', 'Eckerd Rugby W', 'Florida State W', 'UCF W', 'UCF M', 'FIU W', 'FIU M', 'UM Women', 'UNF RFC', 'UF Women', 'Eckerd Rugby M', 'Florida State M', 'Florida Tech Rugby', 'St. Thomas']

In [108]:
for team in college_teams:
    team_df = elo_df[elo_df['Team_Name'] == team]
    highest_elo = team_df['Team_ELO'].max()
    highet_elo_date = team_df[elo_df['Team_ELO'] == highest_elo]['Game_Date'].values[0]
    print(f"{team}: ", results_df[results_df['Team_Name'] == team].shape[0], highest_elo, highet_elo_date)

UM Men:  85 1505 2012-09-08
FAU Rugby Men:  129 1587 2014-10-25
Ave Maria:  64 1567 2014-03-08
USF M:  140 1682 2020-03-06
UF Men:  103 1689 2015-01-17
FAU Rugby W:  27 1549 2013-11-09
FGCU:  55 1517 2012-10-20
USF - W:  58 1567 2015-10-31
FGCU Women:  35 1505 2012-09-29
Eckerd Rugby W:  55 1676 2020-03-07
Florida State W:  62 1624 2013-02-16
UCF W:  65 1774 2018-03-24
UCF M:  118 1777 2015-03-21
FIU W:  55 1762 2016-04-02
FIU M:  127 1689 2018-02-03
UM Women:  17 1505 2012-10-13
UNF RFC:  107 1644 2014-03-08
UF Women:  67 1716 2015-02-28
Eckerd Rugby M:  85 1598 2018-11-10
Florida State M:  107 1635 2024-03-02
Florida Tech Rugby:  4 1511 2015-02-14
St. Thomas:  23 1741 2024-04-20


  highet_elo_date = team_df[elo_df['Team_ELO'] == highest_elo]['Game_Date'].values[0]
