In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy.stats import beta


In [2]:
df = pd.read_csv('2024 Torvik.csv')

#Use standard scaler on sos
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df['SOS'] = scaler.fit_transform(df['SOS'].values.reshape(-1,1))
df['WAB'] = scaler.fit_transform(df['WAB'].values.reshape(-1,1))

ftr = pd.read_csv('Raw Data/FTR.csv')
ftr

#Replace state with St. in ftr if not N.C. State

ftr['School'] = ftr['School'].str.replace('State','St.')

#If name N.C. St. replace with N.C. State
ftr['School'] = ftr['School'].str.replace('N.C. St.','N.C. State')

In [3]:
def get_adj_metric_pos (df, metric):
    X = sm.add_constant(df['SOS'])
    model = sm.OLS(df[metric], X).fit()
    df['Adj ' + metric] = df[metric] + (model.params['SOS'] * df['SOS'])
    return df
def get_adj_metric_neg (df, metric):
    X = sm.add_constant(df['SOS'])
    model = sm.OLS(df[metric], X).fit()
    df['Adj ' + metric] = df[metric] - (model.params['SOS'] * df['SOS'])
    return df
def harmonic_mean(x,y):
    return ((x*y*2)/(x+y))

In [4]:
#Find coefficients to adjust metrics based on SOS
df = get_adj_metric_pos(df, 'ORB')
df = get_adj_metric_pos(df, 'EFG%')
df = get_adj_metric_pos(df, '2P%')
df = get_adj_metric_pos(df, '3P%')
df = get_adj_metric_pos(df, 'TORD')
df = get_adj_metric_pos(df, 'DRB')
df = get_adj_metric_pos(df, 'EFGD%')
df = get_adj_metric_pos(df, '2P%D')
df = get_adj_metric_pos(df, '3P%D')
df = get_adj_metric_pos(df, 'TOR')

In [5]:
def get_beta_value (df,metric,home_team,away_team):
    mean_home = df.loc[home_team][metric]
    mean_away = df.loc[away_team][metric]
    alpha_home = 20 * mean_home
    beta_home = 20 * (1 - mean_home)
    alpha_away = 20 * mean_away
    beta_away = 20 * (1 - mean_away)
    home_val = beta.rvs(alpha_home, beta_home, size=1)
    away_val = beta.rvs(alpha_away, beta_away, size=1)
    return home_val[0], away_val[0]

def get_regular_team_stats(home_team,away_team):
    df_team = df[(df['Team'] == home_team) | (df['Team'] == away_team)]
    df_team = df_team[['Team', 'Barthag', 'Adj ORB', 'Adj DRB', 'Adj EFG%', 'Adj 2P%', 'Adj 3P%', 'Adj TORD', 'Adj EFGD%', 'Adj TOR', 'Adj 2P%D', 'Adj 3P%D','Adj T.','3PR','FTR','FTRD','3PRD']]
    df_team = df_team.set_index('Team')
    for col in [ 'Adj ORB', 'Adj DRB', 'Adj EFG%', 'Adj 2P%', 'Adj 3P%', 'Adj TORD', 'Adj EFGD%', 'Adj TOR', 'Adj 2P%D', 'Adj 3P%D','3PR','FTR','FTRD','3PRD']:
        df_team[col] = df_team[col] / 100
    return df_team

def get_simulated_team_stats(home_team, away_team, df):
    df_team = df[(df['Team'] == home_team) | (df['Team'] == away_team)]
    df_team = df_team[['Team', 'Barthag' ,'Adj ORB', 'Adj DRB', 'Adj EFG%', 'Adj 2P%', 'Adj 3P%', 'Adj TORD', 'Adj EFGD%', 'Adj TOR', 'Adj 2P%D', 'Adj 3P%D','Adj T.','3PR','FTR','FTRD','3PRD']]
    df_team = df_team.set_index('Team')
    #Reorder rows such that home team is first
    if df_team.index[0] == away_team:
        df_team = df_team.reindex([home_team, away_team])
    df_sim = df_team.copy()
    for col in [ 'Adj ORB', 'Adj DRB', 'Adj EFG%', 'Adj 2P%', 'Adj 3P%', 'Adj TORD', 'Adj EFGD%', 'Adj TOR', 'Adj 2P%D', 'Adj 3P%D','3PR','FTR','FTRD','3PRD']:
        df_team[col] = df_team[col] / 100
        df_sim[col] = df_sim[col] / 100
        df_sim[col] = get_beta_value(df_sim,col,home_team,away_team)
    return df_sim

def game_spread_ppp (home_team, away_team, teams_df, use_bart = True):

    

    #For each team take the harmonic mean of 
    df_game = pd.DataFrame()
    df_game['Team'] = [home_team, away_team]

    
    #Get free throw rates for home and away teams
    home_ftr = ftr[ftr['School'] == home_team]['FT%'].values[0]
    away_ftr = ftr[ftr['School'] == away_team]['FT%'].values[0]

    #Get strength differences
    h_w = 1
    home_advantage = (teams_df.loc[home_team]['Barthag'] - teams_df.loc[away_team]['Barthag'])/3.5 + 1
    if use_bart:
        home_advantage = home_advantage
    else:
        home_advantage = 1
    a_w = 1#(teams_df.loc[away_team]['Barthag'] - teams_df.loc[home_team]['Barthag']) + 1
    
    #Get harmonic means for each team
    df_game['ORB'] = [harmonic_mean(teams_df.loc[home_team]['Adj ORB'],teams_df.loc[away_team]['Adj DRB']),
                    harmonic_mean(teams_df.loc[away_team]['Adj ORB'],teams_df.loc[home_team]['Adj DRB'])]
    df_game['2P%'] = [harmonic_mean(teams_df.loc[home_team]['Adj 2P%'],teams_df.loc[away_team]['Adj 2P%D']),
                    harmonic_mean(teams_df.loc[away_team]['Adj 2P%'],teams_df.loc[home_team]['Adj 2P%D'])]
    df_game['3P%'] = [harmonic_mean(teams_df.loc[home_team]['Adj 3P%'],teams_df.loc[away_team]['Adj 3P%D']),
                    harmonic_mean(teams_df.loc[away_team]['Adj 3P%'],teams_df.loc[home_team]['Adj 3P%D'])]
    df_game['TOR'] = [harmonic_mean(teams_df.loc[home_team]['Adj TOR'],teams_df.loc[away_team]['Adj TORD']),
                    harmonic_mean(teams_df.loc[away_team]['Adj TOR'],teams_df.loc[home_team]['Adj TORD'])]
    df_game['FTR'] = [harmonic_mean(teams_df.loc[home_team]['FTR'],teams_df.loc[away_team]['FTRD']),
                    harmonic_mean(teams_df.loc[away_team]['FTR'],teams_df.loc[home_team]['FTRD'])]
    df_game['Adj FTR'] = df_game['FTR'] * .44
    df_game['FT%'] = [home_ftr, away_ftr]
    df_game['3PR'] = [harmonic_mean(teams_df.loc[home_team]['3PR'],teams_df.loc[away_team]['3PRD']),
                    harmonic_mean(teams_df.loc[away_team]['3PR'],teams_df.loc[home_team]['3PRD'])]

    df_game['ev_2pt'] = df_game['2P%'] * (1 - df_game['3PR']) * 2
    df_game['ev_3pt'] = df_game['3P%'] * df_game['3PR'] * 3
    df_game['Adj ORB'] = ((df_game['2P%'] * (1 - df_game['3PR']) + (df_game['3P%'] * df_game['3PR']))) * df_game['ORB']    
    df_game['shot attempts'] = (((1-df_game['TOR'] - df_game['Adj FTR'] - df_game['Adj ORB']) * 1)) + (df_game['Adj ORB']*2)
    df_game['Adj T.'] = [teams_df.loc[home_team]['Adj T.'], teams_df.loc[away_team]['Adj T.']]
    df_game['PPP'] = df_game['shot attempts'] * df_game['ev_2pt'] + df_game['shot attempts'] * df_game['ev_3pt'] + (df_game['Adj FTR'] * df_game['FT%'] * 1.5)
    df_game['Est FTA'] = (df_game['Adj FTR'] * 1.5 * df_game['Adj T.']) 
    

    possessions = (teams_df.loc[home_team]['Adj T.']+teams_df.loc[away_team]['Adj T.'])/2

    df_game['Predicted Score'] = df_game['PPP'] * possessions

    #Update home team to multiply by home_advantage if use_bart = True

    df_game.loc[0,'Predicted Score'] = df_game.loc[0,'Predicted Score'] * home_advantage

    #Get projected score for home team

    #print('Projected Spread Home Team: ', df_game.loc[1]['Predicted Score'] - (df_game.loc[0]['Predicted Score'] + 3))
    #print('Projected Total: ', df_game.loc[0]['Predicted Score'] + df_game.loc[1]['Predicted Score'])

    return df_game

def probability_to_moneyline(win_prob):
    """
    Convert win probability (0 to 1) to American moneyline odds.
    
    Args:
        win_prob (float): The probability of winning (between 0 and 1).
        
    Returns:
        int: The moneyline odds.
    """
    if not 0 < win_prob < 1:
        raise ValueError("Win probability must be between 0 and 1 (exclusive).")
        
    if win_prob > 0.5:
        # Negative odds for favorites
        moneyline = -100 * (win_prob / (1 - win_prob))
    else:
        # Positive odds for underdogs
        moneyline = 100 * ((1 - win_prob) / win_prob)
        
    return int(round(moneyline))

def get_betting_pcts (spread_home, vegas_ou,spread,totals):

    
    spread = np.array(spread)
    totals = np.array(totals)

    if spread_home > 0:
        print('Odds of Home Covering spread:', len(spread[spread <= spread_home])/len(spread))
    else:
        print('Odds of Home Covering spread:', len(spread[spread <= spread_home])/len(spread))
    if spread_home > 0:
        print('Odds of Away Covering spread:', len(spread[spread >= spread_home])/len(spread))
    else:
        print('Odds of Away Covering spread:', len(spread[spread >= spread_home])/len(spread))

    print('Odds of Over: ', len(totals[totals >= vegas_ou])/len(totals))
    print('Odds of Under: ', len(totals[totals <= vegas_ou])/len(totals))

def simulate_game(home_team, away_team,num_sims = 1000,home_adjustment = 3,away_adjustment = 0, use_bart = True):
    home_wins = 0
    away_wins = 0
    spread = []
    totals = []
    ties = 0
    for i in range(num_sims):
        df_team = get_simulated_team_stats(home_team, away_team, df)
        df_game = game_spread_ppp(home_team, away_team, df_team, use_bart)
        #home_advantage = (df_team.loc[home_team]['Barthag'] - df_team.loc[away_team]['Barthag'])/2
        if (df_game.loc[0]['Predicted Score'] + home_adjustment)  > df_game.loc[1]['Predicted Score'] + away_adjustment:
            home_wins += 1
        elif (df_game.loc[0]['Predicted Score'] + home_adjustment) < df_game.loc[1]['Predicted Score'] + away_adjustment:
            away_wins += 1
        else:
            ties += 1
        spread.append(df_game.loc[1]['Predicted Score'] + away_adjustment - ((df_game.loc[0]['Predicted Score'] + home_adjustment) ))
        totals.append((df_game.loc[0]['Predicted Score'] + home_adjustment)  + df_game.loc[1]['Predicted Score'])
    
    print(home_team + ' Wins: ', home_wins/num_sims)
    print(away_team + ' Wins: ', away_wins/num_sims)
    print('Ties: ', ties/num_sims)
    print('Median Spread: ', np.median(spread))
    print('Median Total: ', np.median(totals))
    print('Average Spread: ', np.mean(spread))
    print('Average Total: ', np.mean(totals))
    print('Money Line Home Team: ',probability_to_moneyline(home_wins/num_sims))


    #plt.hist(spread, bins=50)
    #plt.show()
    #plt.hist(totals, bins=50)
    #plt.show()

    return spread,totals

def get_efficiency_pred(home_team, away_team):
    df_game = df[(df['Team'] == home_team) | (df['Team'] == away_team)]
    df_game = df_game[['Team','AdjOE','AdjDE','Adj T.']]
    df_game = df_game.set_index('Team')
    #Make sure home team is first
    if df_game.index[0] == away_team:
        df_game = df_game.reindex([home_team, away_team])
    expected_possessions = df_game['Adj T.'].sum() / 2

    df_game['Expected PP100'] = [harmonic_mean(df_game.loc[home_team]['AdjOE'],df_game.loc[away_team]['AdjDE']),
                        harmonic_mean(df_game.loc[away_team]['AdjOE'],df_game.loc[home_team]['AdjDE'])]
    df_game['Score'] = (df_game['Expected PP100'] * expected_possessions)/100
    df_game['Score']
    spread = df_game.loc[away_team]['Score'] - (df_game.loc[home_team]['Score'] + 3)
    total = df_game['Score'].sum()
    return spread,total

import scipy.stats as stats
def calculate_win_probability(proj_spread, vegas_spread, sigma=4.5):
    """Calculate win probability based on projected and Vegas spread."""
    diff = vegas_spread - proj_spread  # Difference between Vegas and projected spread
    return stats.norm.cdf(diff / sigma)  # Normal CDF for probability

def calculate_ev(win_prob, odds=-110):
    """Calculate Expected Value (EV) given win probability and betting odds."""
    payout = 100 if odds == -110 else abs(10000 / odds)  # Convert odds to payout per $100 risked
    risked = 110  # Standard -110 odds require risking 110 to win 100
    return (win_prob * payout) - ((1 - win_prob) * risked)

In [6]:
ncaa_bracket_2024 = {
    "East": [
        ((1, "Connecticut"), (16, "Stetson")),
        ((8, "Florida Atlantic"), (9, "Northwestern")),
        ((5, "San Diego St."), (12, "UAB")),
        ((4, "Auburn"), (13, "Yale")),
        ((6, "BYU"), (11, "Duquesne")),
        ((3, "Illinois"), (14, "Morehead St.")),
        ((7, "Washington St."), (10, "Drake")),
        ((2, "Iowa St."), (15, "South Dakota St."))
    ],
    "West": [
        ((1, "North Carolina"), (16, "Wagner")),
        ((8, "Mississippi St."), (9, "Michigan St.")),
        ((5, "Saint Mary's"), (12, "Grand Canyon")),
        ((4, "Alabama"), (13, "Charleston")),
        ((6, "Clemson"), (11, "New Mexico")),
        ((3, "Baylor"), (14, "Colgate")),
        ((7, "Dayton"), (10, "Nevada")),
        ((2, "Arizona"), (15, "Long Beach St."))
    ],
    "South": [
        ((1, "Houston"), (16, "Longwood")),
        ((8, "Memphis"), (9, "Texas A&M")),
        ((5, "Wisconsin"), (12, "James Madison")),
        ((4, "Duke"), (13, "Vermont")),
        ((6, "Texas Tech"), (11, "N.C. State")),
        ((3, "Kentucky"), (14, "Oakland")),
        ((7, "Colorado"), (10, "Florida")),
        ((2, "Marquette"), (15, "Western Kentucky"))
    ],
    "Midwest": [
        ((1, "Purdue"), (16, "Grambling St.")),
        ((8, "Utah St."), (9, "TCU")),
        ((5, "Gonzaga"), (12, "McNeese St.")),
        ((4, "Kansas"), (13, "Samford")),
        ((6, "South Carolina"), (11, "Oregon")),
        ((3, "Creighton"), (14, "Akron")),
        ((7, "Texas"), (10, "Colorado St.")),
        ((2, "Saint Peter's"), (15, "Tennessee"))
    ]
}

for region, matchups in ncaa_bracket_2024.items():
    print(f"\n{region} Region:")
    for match in matchups:
        print(f"{match[0][0]} {match[0][1]} vs {match[1][0]} {match[1][1]}")


East Region:
1 Connecticut vs 16 Stetson
8 Florida Atlantic vs 9 Northwestern
5 San Diego St. vs 12 UAB
4 Auburn vs 13 Yale
6 BYU vs 11 Duquesne
3 Illinois vs 14 Morehead St.
7 Washington St. vs 10 Drake
2 Iowa St. vs 15 South Dakota St.

West Region:
1 North Carolina vs 16 Wagner
8 Mississippi St. vs 9 Michigan St.
5 Saint Mary's vs 12 Grand Canyon
4 Alabama vs 13 Charleston
6 Clemson vs 11 New Mexico
3 Baylor vs 14 Colgate
7 Dayton vs 10 Nevada
2 Arizona vs 15 Long Beach St.

South Region:
1 Houston vs 16 Longwood
8 Memphis vs 9 Texas A&M
5 Wisconsin vs 12 James Madison
4 Duke vs 13 Vermont
6 Texas Tech vs 11 N.C. State
3 Kentucky vs 14 Oakland
7 Colorado vs 10 Florida
2 Marquette vs 15 Western Kentucky

Midwest Region:
1 Purdue vs 16 Grambling St.
8 Utah St. vs 9 TCU
5 Gonzaga vs 12 McNeese St.
4 Kansas vs 13 Samford
6 South Carolina vs 11 Oregon
3 Creighton vs 14 Akron
7 Texas vs 10 Colorado St.
2 Saint Peter's vs 15 Tennessee


In [7]:
def simulate_one_game(home_team, away_team,home_adjustment = 0,away_adjustment = 0, use_bart = True):
    home_team_name = home_team[1]
    away_team_name = away_team[1]
    df_team = get_simulated_team_stats(home_team_name, away_team_name, df)
    df_game = game_spread_ppp(home_team_name, away_team_name, df_team, use_bart)
    #home_advantage = (df_team.loc[home_team]['Barthag'] - df_team.loc[away_team]['Barthag'])/2
    if (df_game.loc[0]['Predicted Score'] + home_adjustment)  > df_game.loc[1]['Predicted Score'] + away_adjustment:
        return home_team
    elif (df_game.loc[0]['Predicted Score'] + home_adjustment) < df_game.loc[1]['Predicted Score'] + away_adjustment:
        return away_team
    else:
        ties += 1

def simulate_round(matchups, progress):
    """Simulate a round of games and return the winners."""
    winners = []
    for match in matchups:
        #print(match)
        winner = simulate_one_game(match[0], match[1])
        losers = [team for team in match if team != winner]
        for loser in losers:
            progress[loser[1]] += 1  # Increment the round for the losing team
        winners.append(winner)
    return winners

def advance_winners(region, matchups, progress):
    """Advance the winners to the next round and track progress."""
    round_number = 1
    while len(matchups) > 1:
        matchups = [(simulate_one_game(matchups[i], matchups[i+1])) for i in range(0, len(matchups), 2)]
        round_number += 1
    for team in matchups:
        progress[team[1]] = round_number  # Update the round for the winning team
    return matchups[0]



In [43]:
import copy
from collections import defaultdict

def simulate_tournament(simulate_one_game, initial_bracket, all_teams, num_simulations=1000):
    team_progress = {team: defaultdict(int) for team in all_teams}
    
    for _ in range(num_simulations):
        # Start with Round of 64
        round_matchups = copy.deepcopy(initial_bracket)
        current_round = "R64"

        # Track which teams are still alive
        alive_teams = set()
        for match in round_matchups:
            for team in match:
                #print(team[1])
                team_progress[team[1]][current_round] += 1
                alive_teams.add(team)

        round_names = ["R32", "S16", "E8", "F4", "Final", "Champion"]
        for next_round in round_names:
            winners = []
            for match in round_matchups:
                winner = simulate_one_game(match[0], match[1])
                #print(winner[1])
                team_progress[winner[1]][next_round] += 1
                winners.append(winner)
            if next_round == "Champion":
                break
            else:  
                round_matchups = [(winners[i], winners[i+1]) for i in range(0, len(winners), 2)]

    return team_progress

# Example call
all_teams = [team[1] for region in ncaa_bracket_2024.values() for match in region for team in match]
bracket = [match for region in ncaa_bracket_2024.values() for match in region]

results = simulate_tournament(simulate_one_game, bracket, all_teams, num_simulations=500)

# To see percentage:
for team, progress in results.items():
    print(f"{team}:")
    for round_name in ["R64", "R32", "S16", "E8", "F4", "Final", "Champion"]:
        percent = progress[round_name] / 500 * 100
        try:
            print(f"  {round_name}: {percent:.1f}%")
        except KeyError:
            print(f"  {round_name}: 0.0%")
            

Connecticut:
  R64: 100.0%
  R32: 95.8%
  S16: 73.4%
  E8: 48.8%
  F4: 33.0%
  Final: 21.2%
  Champion: 14.6%
Stetson:
  R64: 100.0%
  R32: 4.2%
  S16: 0.2%
  E8: 0.0%
  F4: 0.0%
  Final: 0.0%
  Champion: 0.0%
Florida Atlantic:
  R64: 100.0%
  R32: 50.4%
  S16: 12.0%
  E8: 5.6%
  F4: 2.8%
  Final: 1.4%
  Champion: 0.2%
Northwestern:
  R64: 100.0%
  R32: 49.6%
  S16: 14.4%
  E8: 6.0%
  F4: 2.4%
  Final: 1.0%
  Champion: 0.6%
San Diego St.:
  R64: 100.0%
  R32: 72.6%
  S16: 28.8%
  E8: 7.2%
  F4: 4.2%
  Final: 1.2%
  Champion: 0.8%
UAB:
  R64: 100.0%
  R32: 27.4%
  S16: 4.4%
  E8: 0.6%
  F4: 0.0%
  Final: 0.0%
  Champion: 0.0%
Auburn:
  R64: 100.0%
  R32: 79.6%
  S16: 58.8%
  E8: 30.0%
  F4: 20.4%
  Final: 12.4%
  Champion: 8.8%
Yale:
  R64: 100.0%
  R32: 20.4%
  S16: 8.0%
  E8: 1.8%
  F4: 0.8%
  Final: 0.6%
  Champion: 0.0%
BYU:
  R64: 100.0%
  R32: 74.2%
  S16: 41.0%
  E8: 21.8%
  F4: 7.8%
  Final: 4.2%
  Champion: 2.6%
Duquesne:
  R64: 100.0%
  R32: 25.8%
  S16: 9.0%
  E8: 2.8%
  F4: 

# 2025 Bracket

In [45]:
df = pd.read_csv('2025 Torvik.csv')

#Use standard scaler on sos
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df['SOS'] = scaler.fit_transform(df['SOS'].values.reshape(-1,1))
df['WAB'] = scaler.fit_transform(df['WAB'].values.reshape(-1,1))

ftr = pd.read_csv('Raw Data/FTR.csv')
ftr

#Replace state with St. in ftr if not N.C. State

ftr['School'] = ftr['School'].str.replace('State','St.')

#If name N.C. St. replace with N.C. State
ftr['School'] = ftr['School'].str.replace('N.C. St.','N.C. State')

#Find coefficients to adjust metrics based on SOS
df = get_adj_metric_pos(df, 'ORB')
df = get_adj_metric_pos(df, 'EFG%')
df = get_adj_metric_pos(df, '2P%')
df = get_adj_metric_pos(df, '3P%')
df = get_adj_metric_pos(df, 'TORD')
df = get_adj_metric_pos(df, 'DRB')
df = get_adj_metric_pos(df, 'EFGD%')
df = get_adj_metric_pos(df, '2P%D')
df = get_adj_metric_pos(df, '3P%D')
df = get_adj_metric_pos(df, 'TOR')

In [61]:
ncaa_bracket_2025 = {
    "South": [
        ((1, "Auburn"), (16, "Alabama St.")),
        ((8, "Louisville"), (9, "Creighton")),
        ((5, "Michigan"), (12, "UC San Diego")),
        ((4, "Texas A&M"), (13, "Yale")),
        ((6, "Mississippi"), (11, "North Carolina")),
        ((3, "Iowa St."), (14, "Lipscomb")),
        ((7, "Marquette"), (10, "New Mexico")),
        ((2, "Michigan St."), (15, "Bryant"))
    ],
    "West": [
        ((1, "Florida"), (16, "Norfolk St.")),
        ((8, "Connecticut"), (9, "Oklahoma")),
        ((5, "Memphis"), (12, "Colorado St.")),
        ((4, "Maryland"), (13, "Grand Canyon")),
        ((6, "Missouri"), (11, "Drake")),
        ((3, "Texas Tech"), (14, "UNC Wilmington")),
        ((7, "Kansas"), (10, "Arkansas")),
        ((2, "St. John's"), (15, "Nebraska Omaha"))
    ],
    "East": [
        ((1, "Duke"), (16, "American")),
        ((8, "Mississippi St."), (9, "Baylor")),
        ((5, "Oregon"), (12, "Liberty")),
        ((4, "Arizona"), (13, "Akron")),
        ((6, "BYU"), (11, "VCU")),
        ((3, "Wisconsin"), (14, "Montana")),
        ((7, "Saint Mary's"), (10, "Vanderbilt")),
        ((2, "Alabama"), (15, "Robert Morris"))
    ],
    "Midwest": [
        ((1, "Houston"), (16, "SIU Edwardsville")),
        ((8, "Gonzaga"), (9, "Georgia")),
        ((5, "Clemson"), (12, "McNeese St.")),
        ((4, "Purdue"), (13, "High Point")),
        ((6, "Illinois"), (11, "Texas")),
        ((3, "Kentucky"), (14, "Troy")),
        ((7, "UCLA"), (10, "Utah St.")),
        ((2, "Tennessee"), (15, "Wofford"))
    ]
}

for region, matchups in ncaa_bracket_2025.items():
    print(f"\n{region} Region:")
    for match in matchups:
        print(f"{match[0][0]} {match[0][1]} vs {match[1][0]} {match[1][1]}")


South Region:
1 Auburn vs 16 Alabama St.
8 Louisville vs 9 Creighton
5 Michigan vs 12 UC San Diego
4 Texas A&M vs 13 Yale
6 Mississippi vs 11 North Carolina
3 Iowa St. vs 14 Lipscomb
7 Marquette vs 10 New Mexico
2 Michigan St. vs 15 Bryant

West Region:
1 Florida vs 16 Norfolk St.
8 Connecticut vs 9 Oklahoma
5 Memphis vs 12 Colorado St.
4 Maryland vs 13 Grand Canyon
6 Missouri vs 11 Drake
3 Texas Tech vs 14 UNC Wilmington
7 Kansas vs 10 Arkansas
2 St. John's vs 15 Nebraska Omaha

East Region:
1 Duke vs 16 American
8 Mississippi St. vs 9 Baylor
5 Oregon vs 12 Liberty
4 Arizona vs 13 Akron
6 BYU vs 11 VCU
3 Wisconsin vs 14 Montana
7 Saint Mary's vs 10 Vanderbilt
2 Alabama vs 15 Robert Morris

Midwest Region:
1 Houston vs 16 SIU Edwardsville
8 Gonzaga vs 9 Georgia
5 Clemson vs 12 McNeese St.
4 Purdue vs 13 High Point
6 Illinois vs 11 Texas
3 Kentucky vs 14 Troy
7 UCLA vs 10 Utah St.
2 Tennessee vs 15 Wofford


In [62]:
import copy
from collections import defaultdict

def simulate_tournament(simulate_one_game, initial_bracket, all_teams, num_simulations=1000):
    team_progress = {team: defaultdict(int) for team in all_teams}
    
    for _ in range(num_simulations):
        # Start with Round of 64
        round_matchups = copy.deepcopy(initial_bracket)
        current_round = "R64"

        # Track which teams are still alive
        alive_teams = set()
        for match in round_matchups:
            for team in match:
                #print(team[1])
                team_progress[team[1]][current_round] += 1
                alive_teams.add(team)

        round_names = ["R32", "S16", "E8", "F4", "Final", "Champion"]
        for next_round in round_names:
            winners = []
            for match in round_matchups:
                winner = simulate_one_game(match[0], match[1])
                #print(winner[1])
                team_progress[winner[1]][next_round] += 1
                winners.append(winner)
            if next_round == "Champion":
                break
            else:  
                round_matchups = [(winners[i], winners[i+1]) for i in range(0, len(winners), 2)]

    return team_progress


            

In [63]:
# Example call
all_teams = [team[1] for region in ncaa_bracket_2025.values() for match in region for team in match]
bracket = [match for region in ncaa_bracket_2025.values() for match in region]

num_sims = 1000

results = simulate_tournament(simulate_one_game, bracket, all_teams, num_simulations=num_sims)

# To see percentage:
for team, progress in results.items():
    print(f"{team}:")
    for round_name in ["R64", "R32", "S16", "E8", "F4", "Final", "Champion"]:
        percent = progress[round_name] / num_sims * 100
        try:
            print(f"  {round_name}: {percent:.1f}%")
        except KeyError:
            print(f"  {round_name}: 0.0%")

#Store in dataframe with columns for each round
df_results = pd.DataFrame(results)
df_results = df_results.transpose()
#Switch counts to percetnage
df_results = df_results.div(num_sims)
df_results.head()


Auburn:
  R64: 100.0%
  R32: 98.2%
  S16: 65.2%
  E8: 45.7%
  F4: 31.6%
  Final: 20.3%
  Champion: 11.7%
Alabama St.:
  R64: 100.0%
  R32: 1.8%
  S16: 0.1%
  E8: 0.1%
  F4: 0.0%
  Final: 0.0%
  Champion: 0.0%
Louisville:
  R64: 100.0%
  R32: 52.4%
  S16: 17.4%
  E8: 9.2%
  F4: 4.2%
  Final: 1.8%
  Champion: 0.4%
Creighton:
  R64: 100.0%
  R32: 47.6%
  S16: 17.3%
  E8: 8.2%
  F4: 4.3%
  Final: 1.4%
  Champion: 0.2%
Michigan:
  R64: 100.0%
  R32: 53.7%
  S16: 32.8%
  E8: 13.2%
  F4: 6.4%
  Final: 2.2%
  Champion: 1.1%
UC San Diego:
  R64: 100.0%
  R32: 46.3%
  S16: 24.7%
  E8: 9.5%
  F4: 4.2%
  Final: 1.7%
  Champion: 0.8%
Texas A&M:
  R64: 100.0%
  R32: 55.4%
  S16: 24.3%
  E8: 8.2%
  F4: 3.2%
  Final: 1.0%
  Champion: 0.5%
Yale:
  R64: 100.0%
  R32: 44.6%
  S16: 18.2%
  E8: 5.9%
  F4: 1.6%
  Final: 0.6%
  Champion: 0.1%
Mississippi:
  R64: 100.0%
  R32: 49.2%
  S16: 20.4%
  E8: 9.2%
  F4: 3.2%
  Final: 1.2%
  Champion: 0.4%
North Carolina:
  R64: 100.0%
  R32: 50.8%
  S16: 21.1%
  E8: 

Unnamed: 0,R64,R32,S16,E8,F4,Final,Champion
Auburn,1.0,0.982,0.652,0.457,0.316,0.203,0.117
Alabama St.,1.0,0.018,0.001,0.001,0.0,0.0,0.0
Louisville,1.0,0.524,0.174,0.092,0.042,0.018,0.004
Creighton,1.0,0.476,0.173,0.082,0.043,0.014,0.002
Michigan,1.0,0.537,0.328,0.132,0.064,0.022,0.011


In [64]:
df_results.to_csv('2025 NCAA Tournament Predictions 1000 v2 sims.csv')