In [1]:
import pandas as pd


In [2]:
pd.set_option('display.max_columns', None)

In [3]:
rpi = pd.read_csv("../2024/2024-05-26-RPI-selection-nolan.csv", index_col=False)

In [4]:
rpi.columns

Index(['Unnamed: 0', 'RPI', 'Team', 'Record', 'SOS', 'NC Rec', 'NC RPI',
       'NC SOS', 'H', 'R', 'N', 'Q1', 'Q2', 'Q3', 'Q4', 'RPI Delta'],
      dtype='object')

In [5]:
rpi_cols = [
    "RPI",
    "Record",
    "Team"
]

In [6]:
rpi = rpi.loc[:, rpi_cols]

In [7]:
rpi.head()

Unnamed: 0,RPI,Record,Team
0,1,49-11,Tennessee SEC (22-8)
1,2,44-13,Texas A&M SEC (19-11)
2,3,40-14,Kentucky SEC (22-8)
3,4,42-13,North Carolina ACC (22-8)
4,5,43-14,Arkansas SEC (20-10)


In [8]:
temp_df = rpi["Team"].str.split(
    pat="  ",
    n=1,
    expand=True
)

In [9]:
rpi["conf_rec"] = temp_df[1]
rpi["Team"] = temp_df[0]

In [10]:
rpi = rpi.drop(["conf_rec"], axis=1)

In [11]:
rpi = rpi[rpi["RPI"] != "RPI"]

In [12]:
rpi.head(50)

Unnamed: 0,RPI,Record,Team
0,1,49-11,Tennessee
1,2,44-13,Texas A&M
2,3,40-14,Kentucky
3,4,42-13,North Carolina
4,5,43-14,Arkansas
5,6,39-15,Georgia
6,7,41-14,Clemson
7,8,42-15,Florida State
8,9,38-20,Wake Forest
9,10,42-13,Indiana State


In [13]:
def make_record_list(record: str) -> list:
    rec_list = record.split("-")
    return rec_list

In [14]:
rpi["Record"] = rpi["Record"].apply(make_record_list)
rpi.head()

Unnamed: 0,RPI,Record,Team
0,1,"[49, 11]",Tennessee
1,2,"[44, 13]",Texas A&M
2,3,"[40, 14]",Kentucky
3,4,"[42, 13]",North Carolina
4,5,"[43, 14]",Arkansas


In [15]:
def get_wins(record: list) -> int:
    return record[0]

def get_losses(record: list) -> int:
    return record[1]

def get_ties(record: list) -> int:
    if len(record) > 2:
        return record[2]
    else:
        return 0

In [16]:
rpi["wins"] = rpi["Record"].apply(get_wins)
rpi["losses"] = rpi["Record"].apply(get_losses)
rpi["ties"] = rpi["Record"].apply(get_ties)

rpi = rpi.drop(["Record"], axis=1)

rpi.head()

Unnamed: 0,RPI,Team,wins,losses,ties
0,1,Tennessee,49,11,0
1,2,Texas A&M,44,13,0
2,3,Kentucky,40,14,0
3,4,North Carolina,42,13,0
4,5,Arkansas,43,14,0


In [17]:
def get_wins_ties(w, t):
    return float(w) + 0.5 * float(t)

def get_losses_ties(l, t):
    return float(l) + 0.5 * float(t)

In [18]:
rpi["wins"] = rpi.apply(lambda team: get_wins_ties(team["wins"], team["ties"]), axis=1)
rpi["losses"] = rpi.apply(lambda team: get_losses_ties(team["losses"], team["ties"]), axis=1)

rpi = rpi.drop(["ties"], axis=1)

rpi.head(50)

Unnamed: 0,RPI,Team,wins,losses
0,1,Tennessee,49.0,11.0
1,2,Texas A&M,44.0,13.0
2,3,Kentucky,40.0,14.0
3,4,North Carolina,42.0,13.0
4,5,Arkansas,43.0,14.0
5,6,Georgia,39.0,15.0
6,7,Clemson,41.0,14.0
7,8,Florida State,42.0,15.0
8,9,Wake Forest,38.0,20.0
9,10,Indiana State,42.0,13.0


In [19]:
rpi.head()

Unnamed: 0,RPI,Team,wins,losses
0,1,Tennessee,49.0,11.0
1,2,Texas A&M,44.0,13.0
2,3,Kentucky,40.0,14.0
3,4,North Carolina,42.0,13.0
4,5,Arkansas,43.0,14.0


In [20]:
sos = pd.read_csv("../2024/2024-05-26-SOS-selection-nolan.csv", index_col=False)

In [21]:
sos.head(50)

Unnamed: 0.1,Unnamed: 0,Team,SOS,Rank,Opp Record,Opp Win Percent
0,0,Florida,0.6079,1,1880-1064,0.6384
1,1,Ole Miss,0.6065,2,1905-1080,0.6381
2,2,North Carolina State,0.589,3,1704-1075,0.613
3,3,Auburn,0.587,4,1738-1128,0.6064
4,4,South Carolina,0.5847,5,1871-1245,0.6004
5,5,Kentucky,0.5829,6,1739-1176,0.5966
6,6,Alabama,0.5824,7,1784-1168,0.6043
7,7,Wake Forest,0.5822,8,1822-1200,0.6029
8,8,Georgia,0.5778,9,1708-1171,0.5931
9,9,Oklahoma,0.5775,10,1788-1190,0.6003


In [22]:
def get_opp_record_list(opp_record: str) -> list:
    return opp_record.split("-")

In [23]:
sos["Opp Record"] = sos["Opp Record"].apply(get_opp_record_list)

In [24]:
def get_opp_wins(opp_record: list) -> float:
    return float(opp_record[0])

def get_opp_losses(opp_record: list) -> float:
    return float(opp_record[1])

In [25]:
sos["opp_wins"] = sos["Opp Record"].apply(get_opp_wins)
sos["opp_losses"] = sos["Opp Record"].apply(get_opp_losses)

sos = sos.drop(["Unnamed: 0", "Opp Record"], axis=1)
sos.head(50)

Unnamed: 0,Team,SOS,Rank,Opp Win Percent,opp_wins,opp_losses
0,Florida,0.6079,1,0.6384,1880.0,1064.0
1,Ole Miss,0.6065,2,0.6381,1905.0,1080.0
2,North Carolina State,0.589,3,0.613,1704.0,1075.0
3,Auburn,0.587,4,0.6064,1738.0,1128.0
4,South Carolina,0.5847,5,0.6004,1871.0,1245.0
5,Kentucky,0.5829,6,0.5966,1739.0,1176.0
6,Alabama,0.5824,7,0.6043,1784.0,1168.0
7,Wake Forest,0.5822,8,0.6029,1822.0,1200.0
8,Georgia,0.5778,9,0.5931,1708.0,1171.0
9,Oklahoma,0.5775,10,0.6003,1788.0,1190.0


In [26]:
sos["opp_opp_win_pct"] = 3 * sos["SOS"] - 2 * sos["Opp Win Percent"]

In [27]:
def make_string(value):
    return str(value)

In [28]:
rpi["Team"] = rpi["Team"].apply(make_string)
sos["Team"] = sos["Team"].apply(make_string)

In [29]:
rpi_sos = rpi.merge(sos, on="Team", how='inner')

rpi_sos.head(50)

Unnamed: 0,RPI,Team,wins,losses,SOS,Rank,Opp Win Percent,opp_wins,opp_losses,opp_opp_win_pct
0,1,Tennessee,49.0,11.0,0.5618,24,0.5666,1797.0,1374.0,0.5522
1,2,Texas A&M,44.0,13.0,0.5741,13,0.5856,1786.0,1264.0,0.5511
2,3,Kentucky,40.0,14.0,0.5829,6,0.5966,1739.0,1176.0,0.5555
3,4,North Carolina,42.0,13.0,0.5712,15,0.5857,1678.0,1187.0,0.5422
4,5,Arkansas,43.0,14.0,0.5769,11,0.5873,1789.0,1257.0,0.5561
5,6,Georgia,39.0,15.0,0.5778,9,0.5931,1708.0,1171.0,0.5472
6,7,Clemson,41.0,14.0,0.5682,17,0.5767,1672.0,1227.0,0.5512
7,8,Florida State,42.0,15.0,0.5614,26,0.5677,1698.0,1293.0,0.5488
8,9,Wake Forest,38.0,20.0,0.5822,8,0.6029,1822.0,1200.0,0.5408
9,10,Indiana State,42.0,13.0,0.5303,75,0.5404,1576.0,1340.0,0.5101


In [30]:
rpi_sos = rpi_sos.sort_values("Team")

rpi_sos.head(50)

rpi_sos.to_csv("2024_rpi_sos.csv")