# Men's Tournament Scoring

Using the model generated brackets, find the brackets that best perform against a hypothetical public pool. 

### Setup Data

In [1]:
season = 2024
playin_losers = (  # remove play-in losers from seeding data
    1224,  # Howard
    1438,  # Virginia
    1286,  # Montana St
    1129,  # Boise St
)

season

2024

In [2]:
import pickle

with open(f'../data/preprocessed/mens_simulation/{season}_mens_simulation.pkl', 'rb') as f:
    simulation_data = pickle.load(f)

results = simulation_data['results']
candidates = simulation_data['candidates']
public = simulation_data['public']

del simulation_data

'Done'

'Done'

In [3]:
def dict_to_tuple(d):
    """Get values of dictionary in a tuple, sorted by key"""
    return tuple(d[key] for key in sorted(d.keys()))

In [4]:
import numpy as np

results_array = np.array(tuple(dict_to_tuple(result) for result in results))

results_array.shape

(30000, 63)

In [5]:
candidates_array = np.array(tuple(dict_to_tuple(c) for c in candidates))

candidates_array.shape

(40000, 63)

In [6]:
public_array = np.array(tuple(dict_to_tuple(p) for p in public))

public_array.shape

(20000, 63)

### Score Brackets

In [7]:
round_scores = np.array([10]*32 + [20]*16 + [40]*8 + [80]*4 + [160]*2 + [320]*1)

def score_brackets(brackets, ground_truth):
    """Score array of brackets compared to a bracket treated as ground truth"""
    return np.sum((brackets == ground_truth)*round_scores, axis=1)

In [8]:
from tqdm.autonotebook import tqdm

# each row is a public bracket and each column is the tournament simulation

public_scores = np.array(tuple(
    score_brackets(public_array, result) for result in tqdm(results_array)
)).transpose()

public_scores.shape

  from tqdm.autonotebook import tqdm


  0%|          | 0/30000 [00:00<?, ?it/s]

(20000, 30000)

In [9]:
# each row is a candidate bracket and each column is the tournament simulation

candidates_scores = np.array(tuple(
    score_brackets(candidates_array, result) for result in tqdm(results_array)
)).transpose()

candidates_scores.shape

  0%|          | 0/30000 [00:00<?, ?it/s]

(40000, 30000)

In [10]:
def get_best_brackets(pool_size: int, number_of_brackets: int, first_payout: int, second_payout: int, third_payout: int):
    """
    Get the best n possible brackets for a given pool size. 
    Subsequent brackets choices will headge previous bracket choices. 
    """
    third_place = np.quantile(public_scores, q=1-3/pool_size, axis=0)
    second_place = np.quantile(public_scores, q=1-2/pool_size, axis=0)
    first_place = np.quantile(public_scores, q=1-1/pool_size, axis=0)

    candidates_prizes = (
        (candidates_scores > third_place)*(third_payout) + 
        (candidates_scores > second_place)*(second_payout - third_payout) + 
        (candidates_scores > first_place)*(first_payout - second_payout)
    )

    best_brackets = []  # indexes of best brackets from candidates
    for _ in tqdm(range(number_of_brackets)):
        best_available_bracket = candidates_prizes.mean(axis=1).argmax()  # index of best available bracket given previous brackets selected
        prize = candidates_prizes.mean(axis=1)[best_available_bracket]
        best_brackets.append((best_available_bracket, prize))

        # hedging: remove any ground truths that the best available bracket won in
        ignore_indexes = np.where(candidates_prizes[best_available_bracket] != 0)[0]
        candidates_prizes = np.delete(candidates_prizes, ignore_indexes, axis=1)

    return best_brackets

In [11]:
bb = get_best_brackets(
    pool_size=100, 
    number_of_brackets=25, 
    first_payout=85, 
    second_payout=12, 
    third_payout=3,
)

bb

  0%|          | 0/25 [00:00<?, ?it/s]

[(7078, 7.326866666666667),
 (20368, 4.6987722772277225),
 (37537, 5.085933391761612),
 (12891, 5.488845081257813),
 (19415, 4.971793886846293),
 (32205, 4.6201460742544125),
 (39592, 4.551970153161409),
 (15037, 4.813976308964744),
 (25411, 4.193102339416352),
 (10010, 4.201945386400142),
 (33306, 4.549083704804358),
 (21969, 4.3947168994262205),
 (16994, 4.553181442568764),
 (33000, 4.6632505584023125),
 (27441, 4.94733106189665),
 (35285, 4.52013422818792),
 (39892, 4.289249289416485),
 (9747, 4.482893104059564),
 (32937, 4.99341580207502),
 (31424, 4.221537106364237),
 (31586, 4.287595785440613),
 (30684, 4.336276291872241),
 (34658, 3.7720121446315207),
 (4178, 4.114669106434889),
 (33808, 4.2114228456913825)]

### Display Brackets

In [12]:
import pandas as pd

pd.set_option('display.max_columns', 100)

df_teams = df_teams = pd.read_csv(r'..\data\unprocessed\kaggle\MTeams.csv')

df_teams

Unnamed: 0,TeamID,TeamName,FirstD1Season,LastD1Season
0,1101,Abilene Chr,2014,2024
1,1102,Air Force,1985,2024
2,1103,Akron,1985,2024
3,1104,Alabama,1985,2024
4,1105,Alabama A&M,2000,2024
...,...,...,...,...
373,1474,Queens NC,2023,2024
374,1475,Southern Indiana,2023,2024
375,1476,Stonehill,2023,2024
376,1477,TX A&M Commerce,2023,2024


In [13]:
df_seeds = pd.read_csv(r'..\data\unprocessed\kaggle\MNCAATourneySeeds.csv')

df_seeds = df_seeds.loc[df_seeds['Season'] == season, :].reset_index(drop=True)

df_seeds.insert(2, 'Play In', df_seeds['Seed'].str.endswith(('a', 'b')))
df_seeds.insert(2, 'Region', df_seeds['Seed'].str[0])
df_seeds['Seed'] = df_seeds['Seed'].str.extract('(\d+)').astype(int)

df_seeds = df_seeds.loc[~df_seeds['TeamID'].isin(playin_losers), :].reset_index(drop=True)

df_seeds.insert(1, 'Region Seed', df_seeds['Region'] + df_seeds['Seed'].astype(str).str.zfill(2))

df_seeds

Unnamed: 0,Season,Region Seed,Seed,Region,Play In,TeamID
0,2024,W01,1,W,False,1163
1,2024,W02,2,W,False,1235
2,2024,W03,3,W,False,1228
3,2024,W04,4,W,False,1120
4,2024,W05,5,W,False,1361
...,...,...,...,...,...,...
59,2024,Z12,12,Z,False,1241
60,2024,Z13,13,Z,False,1436
61,2024,Z14,14,Z,False,1324
62,2024,Z15,15,Z,False,1443


In [14]:
id_to_team = dict(zip(df_teams['TeamID'], df_teams['TeamName']))
region_seed_to_team = dict(zip(df_seeds['Region Seed'], df_seeds['TeamID'].map(id_to_team)))

len(region_seed_to_team)

64

In [15]:
def display_bracket(bracket):
    display_dict = {game: region_seed_to_team[region_seed] for game, region_seed in bracket.items()}

    print('-'*30)
    print()
    print('ROUND OF 64 WINNERS')
    print()
    for region in ('W', 'X', 'Y', 'Z'):
        print(f'REGION {region}')
        for slot in (1, 8, 5, 4, 6, 3, 7, 2):
            key = f'R1{region}{slot}'
            print(f'{key}: {display_dict[key]}')
        print()
    print('-'*30)
    print()

    print('ROUND OF 32 WINNERS')
    print()
    for region in ('W', 'X', 'Y', 'Z'):
        print(f'REGION {region}')
        for slot in (1, 4, 3, 2):
            key = f'R2{region}{slot}'
            print(f'{key}: {display_dict[key]}')
        print()
    print('-'*30)
    print()

    print('ROUND OF 16 WINNERS')
    print()
    for region in ('W', 'X', 'Y', 'Z'):
        print(f'REGION {region}')
        for slot in (1, 2):
            key = f'R3{region}{slot}'
            print(f'{key}: {display_dict[key]}')
        print()
    print('-'*30)
    print()

    print('ELITE EIGHT WINNERS')
    print()
    for region in ('W', 'X', 'Y', 'Z'):
        print(f'REGION {region}')
        for slot in (1, ):
            key = f'R4{region}{slot}'
            print(f'{key}: {display_dict[key]}')
        print()
    print('-'*30)
    print()

    print('FINAL FOUR WINNERS')
    print()
    for matchup in ('WX', 'YZ'):
        key = f'R5{matchup}'
        print(f'{key}: {display_dict[key]}')
        print()
    print('-'*30)
    print()

    print('FINALS WINNER')
    print()
    for matchup in ('CH', ):
        key = f'R6{matchup}'
        print(f'{key}: {display_dict[key]}')
        print()
    print('-'*30)
    print()

In [16]:
for b, _ in bb:
    display_bracket(candidates[b])

------------------------------

ROUND OF 64 WINNERS

REGION W
R1W1: Connecticut
R1W8: FL Atlantic
R1W5: San Diego St
R1W4: Auburn
R1W6: BYU
R1W3: Illinois
R1W7: Washington St
R1W2: Iowa St

REGION X
R1X1: North Carolina
R1X8: Mississippi St
R1X5: St Mary's CA
R1X4: Col Charleston
R1X6: New Mexico
R1X3: Baylor
R1X7: Dayton
R1X2: Arizona

REGION Y
R1Y1: Purdue
R1Y8: Utah St
R1Y5: Gonzaga
R1Y4: Kansas
R1Y6: Oregon
R1Y3: Creighton
R1Y7: Colorado St
R1Y2: Tennessee

REGION Z
R1Z1: Houston
R1Z8: Nebraska
R1Z5: Wisconsin
R1Z4: Duke
R1Z6: Texas Tech
R1Z3: Kentucky
R1Z7: Florida
R1Z2: Marquette

------------------------------

ROUND OF 32 WINNERS

REGION W
R2W1: Connecticut
R2W4: Auburn
R2W3: Illinois
R2W2: Iowa St

REGION X
R2X1: North Carolina
R2X4: St Mary's CA
R2X3: Baylor
R2X2: Arizona

REGION Y
R2Y1: Purdue
R2Y4: Gonzaga
R2Y3: Creighton
R2Y2: Tennessee

REGION Z
R2Z1: Houston
R2Z4: Duke
R2Z3: Kentucky
R2Z2: Marquette

------------------------------

ROUND OF 16 WINNERS

REGION W
R3W1: Aub