# Results Analysis

## Set Up

In [119]:
# Libraries
import numpy as np
import pandas as pd

In [120]:
# Load Data
data = pd.read_csv('data/processed/data.csv', index_col=False)

In [121]:
# Load Predictions
# List of years to include, excluding 2020
years = list(range(2013, 2025))
years.remove(2020)
# Initialize
predictions = pd.DataFrame()
# Iterate years
for year in years:
    # Read Data
    probs = pd.read_csv(f'results/probabilities/{year}.csv', index_col=False)
    probs['Year'] = year
    # Append
    predictions = pd.concat([predictions, probs], ignore_index=True)

In [122]:
# Convert Pred to Points
predictions['R32'] = predictions['R32']*10
predictions['S16'] = predictions['R32'] + predictions['S16']*20
predictions['E8'] = predictions['S16'] + predictions['E8']*40
predictions['F4'] = predictions['E8'] + predictions['F4']*80
predictions['NCG'] = predictions['F4'] + predictions['NCG']*160
predictions['Winner'] = predictions['NCG'] + predictions['Winner']*320

In [123]:
# Merge
df = data.merge(predictions,
                on=['Year','Team','Seed','Region'])

In [124]:
def confidence_interval(avg, sd, n, ci):
    return str(round(avg-(sd/np.sqrt(n))*ci,2)), str(round(avg+(sd/np.sqrt(n))*ci,2))

## Winner

In [125]:
# Statistics
n = len(df.loc[df['Round']==7,'Winner'])
sd = df.loc[df['Round']==7,'Winner'].std()
avg = df.loc[df['Round']==7,'Winner'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('Winner C.I.: '+lower+'-'+upper)

Winner C.I.: 146.53-222.56


## Championship

In [126]:
# Statistics
n = len(df.loc[df['Round']>=6,'NCG'])
sd = df.loc[df['Round']>=6,'NCG'].std()
avg = df.loc[df['Round']>=6,'NCG'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('NCG C.I.: '+lower+'-'+upper)

NCG C.I.: 75.37-107.67


In [128]:
# Statistics
n = len(df.loc[(df['Round']>=6)&(df['Seed']!=1),'NCG'])
sd = df.loc[(df['Round']>=6)&(df['Seed']!=1),'NCG'].std()
avg = df.loc[(df['Round']>=6)&(df['Seed']!=1),'NCG'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('Non 1 Seed NCG C.I.: '+lower+'-'+upper)

Non 1 Seed NCG C.I.: 40.37-66.72


## Final 4

In [129]:
# Statistics
n = len(df.loc[df['Round']>=5,'F4'])
sd = df.loc[df['Round']>=5,'F4'].std()
avg = df.loc[df['Round']>=5,'F4'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('F4 C.I.: '+lower+'-'+upper)

F4 C.I.: 43.05-55.35


In [131]:
# Statistics
n = len(df.loc[(df['Round']>=5)&(df['Seed']!=1),'F4'])
sd = df.loc[(df['Round']>=5)&(df['Seed']!=1),'F4'].std()
avg = df.loc[(df['Round']>=5)&(df['Seed']!=1),'F4'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('Non 1 Seed F4 C.I.: '+lower+'-'+upper)

Non 1 Seed F4 C.I.: 32.37-44.59


## Elite 8

In [132]:
# Statistics
n = len(df.loc[df['Round']>=4,'E8'])
sd = df.loc[df['Round']>=4,'E8'].std()
avg = df.loc[df['Round']>=4,'E8'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('E8 C.I.: '+lower+'-'+upper)

E8 C.I.: 25.91-30.09


In [134]:
# Statistics
n = len(df.loc[(df['Round']>=4)&(df['Seed']>2),'E8'])
sd = df.loc[(df['Round']>=4)&(df['Seed']>2),'E8'].std()
avg = df.loc[(df['Round']>=4)&(df['Seed']>2),'E8'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('Non 1-2 Seed E8 C.I.: '+lower+'-'+upper)

Non 1-2 Seed E8 C.I.: 18.97-24.12


## Sweet 16

In [135]:
# Statistics
n = len(df.loc[df['Round']>=3,'S16'])
sd = df.loc[df['Round']>=3,'S16'].std()
avg = df.loc[df['Round']>=3,'S16'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('S16 C.I.: '+lower+'-'+upper)

S16 C.I.: 15.48-17.07


In [137]:
# Statistics
n = len(df.loc[(df['Round']>=3)&(df['Seed']>4),'S16'])
sd = df.loc[(df['Round']>=3)&(df['Seed']>4),'S16'].std()
avg = df.loc[(df['Round']>=3)&(df['Seed']>4),'S16'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('Non 1-4 Seed S16 C.I.: '+lower+'-'+upper)

Non 1-4 Seed S16 C.I.: 9.96-12.25


## Round 32

### 1 v 16

In [138]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==1),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==1),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==1),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('1 Seed Winner R32 C.I.: '+lower+'-'+upper)

1 Seed Winner R32 C.I.: 9.37-9.6


In [139]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==1),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==1),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==1),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('1 Seed Loser R32 C.I.: '+lower+'-'+upper)

1 Seed Loser R32 C.I.: 8.44-9.4


In [140]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==16),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==16),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==16),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('16 Seed Winner R32 C.I.: '+lower+'-'+upper)

16 Seed Winner R32 C.I.: 0.6-1.56


In [141]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==16),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==16),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==16),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('16 Seed Loser R32 C.I.: '+lower+'-'+upper)

16 Seed Loser R32 C.I.: 0.4-0.63


### 3 v 14

In [146]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==3),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==3),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==3),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('3 Seed Winner R32 C.I.: '+lower+'-'+upper)

3 Seed Winner R32 C.I.: 8.76-9.27


In [147]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==3),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==3),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==3),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('3 Seed Loser R32 C.I.: '+lower+'-'+upper)

3 Seed Loser R32 C.I.: 7.58-8.98


In [148]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==14),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==14),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==14),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('14 Seed Winner R32 C.I.: '+lower+'-'+upper)

14 Seed Winner R32 C.I.: 1.02-2.42


In [149]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==14),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==14),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==14),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('14 Seed Loser R32 C.I.: '+lower+'-'+upper)

14 Seed Loser R32 C.I.: 0.73-1.24


### 5 v 12

In [156]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==12),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==12),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==12),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('12 Seed Winner R32 C.I.: '+lower+'-'+upper)

12 Seed Winner R32 C.I.: 1.98-3.57


In [157]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==12),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==12),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==12),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('12 Seed Loser R32 C.I.: '+lower+'-'+upper)

12 Seed Loser R32 C.I.: 1.62-2.64


### 6 v 11

In [160]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==11),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==11),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==11),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('11 Seed Winner R32 C.I.: '+lower+'-'+upper)

11 Seed Winner R32 C.I.: 3.04-4.77


In [161]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==11),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==11),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==11),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('11 Seed Loser R32 C.I.: '+lower+'-'+upper)

11 Seed Loser R32 C.I.: 2.42-4.27


### 7 v 10

In [164]:
# Statistics
n = len(df.loc[(df['Round']>=2)&(df['Seed']==10),'R32'])
sd = df.loc[(df['Round']>=2)&(df['Seed']==10),'R32'].std()
avg = df.loc[(df['Round']>=2)&(df['Seed']==10),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('10 Seed Winner R32 C.I.: '+lower+'-'+upper)

10 Seed Winner R32 C.I.: 4.08-6.26


In [165]:
# Statistics
n = len(df.loc[(df['Round']<2)&(df['Seed']==10),'R32'])
sd = df.loc[(df['Round']<2)&(df['Seed']==10),'R32'].std()
avg = df.loc[(df['Round']<2)&(df['Seed']==10),'R32'].mean()
ci = 1.96

# Confidence Interval
lower, upper = confidence_interval(avg, sd, n, ci)
print('10 Seed Loser R32 C.I.: '+lower+'-'+upper)

10 Seed Loser R32 C.I.: 3.58-5.27
