In [1]:
import re
# Function to extract and assign home and away teams
def extract_home_away(match):
    if "vs." in match:
        # Split the match string by "vs." and return the two teams
        teams = match.split("vs.")
        # Remove any numbers from the team names
        teams = [re.sub(r'\d+', '', team).strip() for team in teams]
        #Order teams alphabetically
        teams.sort()
        return teams[0], teams[1], 0
    else:
        # Use regex to find all words that are not digits or parentheses, including special characters like apostrophes and periods
        teams = re.findall(r"[A-Za-z\s\.'’&]+", match)
        # Remove leading/trailing whitespace
        teams = [team.strip() for team in teams]

        # Check for (A) or (H) in the match string
        if '(A)' in match:
            return teams[0], teams[-1], 3  # First team is away, second team is home
        elif '(H)' in match:
            return teams[-1], teams[0], 3  # First team is home, second team is away
        else:
            # Handle cases with only one team name or no (A)/(H) indicator
            if len(teams) == 1:
                return None, teams[0],0  # Assume single team is home
            else:
                return teams[0], teams[-1],0  # Default to first team as home, second as away

In [2]:
# Function to extract the first team name
def extract_first_team(match):
    if "vs." in match:
        # Split the match string by "vs." and return the first team
        teams = match.split("vs.")
        # Remove any numbers from the team names
        teams = [re.sub(r'\d+', '', team).strip() for team in teams]
        return teams[0].strip()
    else:
        # Use regex to find all words that are not digits or parentheses, including special characters like apostrophes and periods
        teams = re.findall(r"[A-Za-z\s\.'’&]+", match)
        # Remove leading/trailing whitespace and return the first team name
        if teams:
            return teams[0].strip()
        return None

In [5]:
import pandas as pd
url = 'https://barttorvik.com/trank.php?year=2026&sort=&top=0&conlimit=All&venue=All&type=R#'
df = pd.read_html(url, header= 1)[0]
df['Year'] = 2026


finaldf = df.copy()

#Clean up the dataset
finaldf[['Rec', 'Conf Record']] = finaldf['Rec'].str.split(' ', expand=True)
finaldf[['AdjOE', 'AdjOE Rank']] = finaldf['AdjOE'].str.split(' ', expand=True)
finaldf[['AdjDE', 'AdjDE Rank']] = finaldf['AdjDE'].str.split(' ', expand=True)
finaldf[['Barthag', 'Barthag Rank']] = finaldf['Barthag'].str.split(' ', expand=True)
finaldf[['EFG%', 'EFG% Rank']] = finaldf['EFG%'].str.split(' ', expand=True)
finaldf[['EFGD%', 'EFGD% Rank']] = finaldf['EFGD%'].str.split(' ', expand=True)
finaldf[['TOR', 'TOR Rank']] = finaldf['TOR'].str.split(' ', expand=True)
finaldf[['TORD', 'TORD Rank']] = finaldf['TORD'].str.split(' ', expand=True)
finaldf[['ORB', 'ORB Rank']] = finaldf['ORB'].str.split(' ', expand=True)
finaldf[['DRB', 'DRB Rank']] = finaldf['DRB'].str.split(' ', expand=True)
finaldf[['FTR', 'FTR Rank']] = finaldf['FTR'].str.split(' ', expand=True)
finaldf[['FTRD', 'FTRD Rank']] = finaldf['FTRD'].str.split(' ', expand=True)
finaldf[['2P%', '2P% Rank']] = finaldf['2P%'].str.split(' ', expand=True)
finaldf[['2P%D', '2P%D Rank']] = finaldf['2P%D'].str.split(' ', expand=True)
finaldf[['3P%', '3P% Rank']] = finaldf['3P%'].str.split(' ', expand=True)
finaldf[['3P%D', '3P%D Rank']] = finaldf['3P%D'].str.split(' ', expand=True)
finaldf[['3PR', '3PR Rank']] = finaldf['3PR'].str.split(' ', expand=True)
finaldf[['3PRD', '3PRD Rank']] = finaldf['3PRD'].str.split(' ', expand=True)
finaldf[['Adj T.', 'Adj T. Rank']] = finaldf['Adj T.'].str.split(' ', expand=True)
finaldf[['WAB', 'WAB Rank']] = finaldf['WAB'].str.split(' ', expand=True)
finaldf = finaldf[finaldf['Team']!= "Team"]

finaldf = finaldf.apply(pd.to_numeric, errors='ignore')

#Remove all columns with word rank in it
finaldf = finaldf[finaldf.columns.drop(list(finaldf.filter(regex='Rank')))]
finaldf[['Away', 'Home', 'Home_Advantage']] = finaldf['Team'].apply(lambda x: pd.Series(extract_home_away(x)))
finaldf['Team'] = finaldf['Team'].apply(extract_first_team)
#Get sos
csv = 'http://barttorvik.com/2026_team_results.csv'
team_results = pd.read_csv(csv, index_col=0)
team_results.columns = team_results.columns[1:].tolist() + ['Fun Rk, adjt']
sos = team_results[['team','sos']]
sos.columns = ['Team', 'SOS']
#Join finaldf and team_results by connecting Team and rank
finaldf = finaldf.merge(sos, on='Team')
finaldf.to_csv('2025 Torvik.csv', index=False)

ValueError: No tables found

In [4]:
finaldf

Unnamed: 0,Rk,Team,Conf,G,Rec,AdjOE,AdjDE,Barthag,EFG%,EFGD%,...,3PR,3PRD,Adj T.,WAB,Year,Conf Record,Away,Home,Home_Advantage,SOS
0,1,Houston,B12,34,30–4,124.4,87.6,0.9826,52.7,44.9,...,34.5,43.1,62.1,11.4,2025,19–1,Houston,,0,0.764415
1,2,Duke,ACC,34,31–3,127.2,90.9,0.9795,57.4,44.5,...,45.4,37.9,66.5,9.3,2025,19–1,Duke,,0,0.695892
2,3,Auburn,SEC,33,28–5,129.6,93.2,0.9780,55.7,46.0,...,40.6,34.8,68.6,12.6,2025,15–3,Auburn,,0,0.800202
3,4,Florida,SEC,34,30–4,126.9,94.1,0.9690,55.0,45.3,...,43.6,37.3,70.3,11.0,2025,14–4,Florida,,0,0.735231
4,5,Alabama,SEC,33,25–8,127.2,96.2,0.9616,56.3,47.9,...,46.2,35.1,75.4,10.0,2025,13–5,Alabama,,0,0.832850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,360,Chicago St.,NEC,32,4–28,91.8,114.6,0.0724,44.4,54.9,...,40.9,39.4,69.3,-21.9,2025,4–12,,Chicago St.,0,0.419647
360,361,The Citadel,SC,26,1–25,93.5,117.1,0.0697,46.9,54.5,...,45.2,42.7,65.3,-21.5,2025,0–18,,The Citadel,0,0.343586
361,362,Arkansas Pine Bluff,SWAC,29,4–25,96.0,122.3,0.0583,50.3,56.3,...,37.9,44.9,72.3,-20.1,2025,3–15,,Arkansas Pine Bluff,0,0.319036
362,363,Coppin St.,MEAC,29,5–24,87.7,112.6,0.0535,44.0,55.6,...,32.1,38.8,68.7,-19.1,2025,4–10,,Coppin St.,0,0.383252


In [8]:
#Get rows where home and away are not null and store in matchups
matchups = finaldf[finaldf['Home'].notnull() & finaldf['Away'].notnull()]
matchups = matchups[['Home', 'Away', 'Home_Advantage']]
#Get unique matchups
matchups = matchups.drop_duplicates()
matchups.to_csv('03-15-2025 Matchups.csv', index=False)

In [None]:
#Pull last month of data
import pandas as pd
url = 'https://barttorvik.com/trank.php?year=2025&sort=&top=0&conlimit=All&venue=All&type=R#'
df = pd.read_html(url, header= 1)[0]
df['Year'] = 2025


finaldf = df.copy()

#Clean up the dataset
finaldf[['Rec', 'Conf Record']] = finaldf['Rec'].str.split(' ', expand=True)
finaldf[['AdjOE', 'AdjOE Rank']] = finaldf['AdjOE'].str.split(' ', expand=True)
finaldf[['AdjDE', 'AdjDE Rank']] = finaldf['AdjDE'].str.split(' ', expand=True)
finaldf[['Barthag', 'Barthag Rank']] = finaldf['Barthag'].str.split(' ', expand=True)
finaldf[['EFG%', 'EFG% Rank']] = finaldf['EFG%'].str.split(' ', expand=True)
finaldf[['EFGD%', 'EFGD% Rank']] = finaldf['EFGD%'].str.split(' ', expand=True)
finaldf[['TOR', 'TOR Rank']] = finaldf['TOR'].str.split(' ', expand=True)
finaldf[['TORD', 'TORD Rank']] = finaldf['TORD'].str.split(' ', expand=True)
finaldf[['ORB', 'ORB Rank']] = finaldf['ORB'].str.split(' ', expand=True)
finaldf[['DRB', 'DRB Rank']] = finaldf['DRB'].str.split(' ', expand=True)
finaldf[['FTR', 'FTR Rank']] = finaldf['FTR'].str.split(' ', expand=True)
finaldf[['FTRD', 'FTRD Rank']] = finaldf['FTRD'].str.split(' ', expand=True)
finaldf[['2P%', '2P% Rank']] = finaldf['2P%'].str.split(' ', expand=True)
finaldf[['2P%D', '2P%D Rank']] = finaldf['2P%D'].str.split(' ', expand=True)
finaldf[['3P%', '3P% Rank']] = finaldf['3P%'].str.split(' ', expand=True)
finaldf[['3P%D', '3P%D Rank']] = finaldf['3P%D'].str.split(' ', expand=True)
finaldf[['3PR', '3PR Rank']] = finaldf['3PR'].str.split(' ', expand=True)
finaldf[['3PRD', '3PRD Rank']] = finaldf['3PRD'].str.split(' ', expand=True)
finaldf[['Adj T.', 'Adj T. Rank']] = finaldf['Adj T.'].str.split(' ', expand=True)
finaldf[['WAB', 'WAB Rank']] = finaldf['WAB'].str.split(' ', expand=True)
finaldf = finaldf[finaldf['Team']!= "Team"]

finaldf = finaldf.apply(pd.to_numeric, errors='ignore')

#Remove all columns with word rank in it
finaldf = finaldf[finaldf.columns.drop(list(finaldf.filter(regex='Rank')))]
finaldf[['Away', 'Home', 'Home_Advantage']] = finaldf['Team'].apply(lambda x: pd.Series(extract_home_away(x)))
finaldf['Team'] = finaldf['Team'].apply(extract_first_team)
#Get sos
csv = 'http://barttorvik.com/2025_team_results.csv'
team_results = pd.read_csv(csv, index_col=0)
team_results.columns = team_results.columns[1:].tolist() + ['Fun Rk, adjt']
sos = team_results[['team','sos']]
sos.columns = ['Team', 'SOS']
#Join finaldf and team_results by connecting Team and rank
finaldf = finaldf.merge(sos, on='Team')
finaldf.to_csv('2025 Torvik.csv', index=False)

In [None]:
import pandas as pd
url = 'https://barttorvik.com/trank.php?year=2024&sort=&hteam=&t2value=&conlimit=All&state=All&begin=20231101&end=20240311&top=0&revquad=0&quad=5&venue=All&type=All&mingames=0#'
df = pd.read_html(url, header= 1)[0]
df['Year'] = 2024


finaldf = df.copy()

#Clean up the dataset
finaldf[['Rec', 'Conf Record']] = finaldf['Rec'].str.split(' ', expand=True)
finaldf[['AdjOE', 'AdjOE Rank']] = finaldf['AdjOE'].str.split(' ', expand=True)
finaldf[['AdjDE', 'AdjDE Rank']] = finaldf['AdjDE'].str.split(' ', expand=True)
finaldf[['Barthag', 'Barthag Rank']] = finaldf['Barthag'].str.split(' ', expand=True)
finaldf[['EFG%', 'EFG% Rank']] = finaldf['EFG%'].str.split(' ', expand=True)
finaldf[['EFGD%', 'EFGD% Rank']] = finaldf['EFGD%'].str.split(' ', expand=True)
finaldf[['TOR', 'TOR Rank']] = finaldf['TOR'].str.split(' ', expand=True)
finaldf[['TORD', 'TORD Rank']] = finaldf['TORD'].str.split(' ', expand=True)
finaldf[['ORB', 'ORB Rank']] = finaldf['ORB'].str.split(' ', expand=True)
finaldf[['DRB', 'DRB Rank']] = finaldf['DRB'].str.split(' ', expand=True)
finaldf[['FTR', 'FTR Rank']] = finaldf['FTR'].str.split(' ', expand=True)
finaldf[['FTRD', 'FTRD Rank']] = finaldf['FTRD'].str.split(' ', expand=True)
finaldf[['2P%', '2P% Rank']] = finaldf['2P%'].str.split(' ', expand=True)
finaldf[['2P%D', '2P%D Rank']] = finaldf['2P%D'].str.split(' ', expand=True)
finaldf[['3P%', '3P% Rank']] = finaldf['3P%'].str.split(' ', expand=True)
finaldf[['3P%D', '3P%D Rank']] = finaldf['3P%D'].str.split(' ', expand=True)
finaldf[['3PR', '3PR Rank']] = finaldf['3PR'].str.split(' ', expand=True)
finaldf[['3PRD', '3PRD Rank']] = finaldf['3PRD'].str.split(' ', expand=True)
finaldf[['Adj T.', 'Adj T. Rank']] = finaldf['Adj T.'].str.split(' ', expand=True)
finaldf[['WAB', 'WAB Rank']] = finaldf['WAB'].str.split(' ', expand=True)
finaldf = finaldf[finaldf['Team']!= "Team"]

finaldf = finaldf.apply(pd.to_numeric, errors='ignore')

#Remove all columns with word rank in it
finaldf = finaldf[finaldf.columns.drop(list(finaldf.filter(regex='Rank')))]
finaldf[['Away', 'Home']] = finaldf['Team'].apply(lambda x: pd.Series(extract_home_away(x)))
finaldf['Team'] = finaldf['Team'].apply(extract_first_team)
#Get sos
csv = 'http://barttorvik.com/2024_team_results.csv'
team_results = pd.read_csv(csv, index_col=0)
team_results.columns = team_results.columns[1:].tolist() + ['Fun Rk, adjt']
sos = team_results[['team','sos']]
sos.columns = ['Team', 'SOS']


  finaldf = finaldf.apply(pd.to_numeric, errors='ignore')


In [None]:
#Join finaldf and team_results by connecting Team and rank
finaldf = finaldf.merge(sos, on='Team')
finaldf.to_csv('2024 Torvik.csv', index=False)

In [None]:
ncaa_bracket_2025 = {
    "South": [
        ((1, "Auburn"), (16, "Alabama St.")),
        ((8, "Louisville"), (9, "Creighton")),
        ((5, "Michigan"), (12, "UC San Diego")),
        ((4, "Texas A&M"), (13, "Yale")),
        ((6, "Mississippi"), (11, "San Diego St.")),
        ((3, "Iowa St."), (14, "Lipscomb")),
        ((7, "Marquette"), (10, "New Mexico")),
        ((2, "Michigan St."), (15, "Bryant"))
    ],
    "West": [
        ((1, "Florida"), (16, "Norfolk St.")),
        ((8, "Connecticut"), (9, "Oklahoma")),
        ((5, "Memphis"), (12, "Colorado St.")),
        ((4, "Maryland"), (13, "Grand Canyon")),
        ((6, "Missouri"), (11, "Drake")),
        ((3, "Texas Tech"), (14, "UNC Wilmington")),
        ((7, "Kansas"), (10, "Arkansas")),
        ((2, "St. John's"), (15, "Omaha"))
    ],
    "East": [
        ((1, "Duke"), (16, "American")),
        ((8, "Mississippi St."), (9, "Baylor")),
        ((5, "Oregon"), (12, "Liberty")),
        ((4, "Arizona"), (13, "Akron")),
        ((6, "BYU"), (11, "VCU")),
        ((3, "Wisconsin"), (14, "Montana")),
        ((7, "Saint Mary's"), (10, "Vanderbilt")),
        ((2, "Alabama"), (15, "Robert Morris"))
    ],
    "Midwest": [
        ((1, "Houston"), (16, "SIU Edwardsville")),
        ((8, "Gonzaga"), (9, "Georgia")),
        ((5, "Clemson"), (12, "McNeese St.")),
        ((4, "Purdue"), (13, "High Point")),
        ((6, "Illinois"), (11, "Texas")),
        ((3, "Kentucky"), (14, "Troy")),
        ((7, "UCLA"), (10, "Utah St.")),
        ((2, "Tennessee"), (15, "Wofford"))
    ]
}

for region, matchups in ncaa_bracket_2024.items():
    print(f"\n{region} Region:")
    for match in matchups:
        print(f"{match[0][0]} {match[0][1]} vs {match[1][0]} {match[1][1]}")