In [1]:
import pandas as pd
from pprint import pprint

def make1000(POWER_DICT,FORMULA_NUMBER,YEAR=2019,NUM_BRACKETS=1000):

    # Returns the probability that team1 beats team2
    def prob(team1, team2):
        return team1 / (team1+team2) * 100

    # First we'll set up the empty bracket
    empty_bracket_file = 'config\EmptyBracket.csv'
    empty_bracket_df = pd.read_csv(empty_bracket_file,dtype='object')

    # Make a copy for manipulation
    bracket_df = empty_bracket_df.copy()

    import random

    def pure_rd(rd,power_dict):
        next_rd_teams = []
        for team in rd:
            team1 = team
            team2 = next(rd)
            # print(team1 + ' - ' + team2 + ': ' + str(prob(power_dict[team1],power_dict[team2])) + ' %')
            odds = prob(power_dict[team1],power_dict[team2])
            if odds > 50:
                winner = team1
            else:
                winner = team2  
            next_rd_teams.append(winner)   
            #print(odds, winner)
        return next_rd_teams

    def pure_bracket(power_dict):
        Rd1 = iter(empty_bracket_df['Rd1'].values)
        Rd2_teams = pure_rd(Rd1,power_dict)
        # print(Rd2_teams)
        Rd2 = iter(Rd2_teams)
        S16_teams = pure_rd(Rd2,power_dict)
        # print(S16_teams)
        S16 = iter(S16_teams)
        E8_teams = pure_rd(S16,power_dict)
        # print(E8_teams)
        E8 = iter(E8_teams)
        FF_teams = pure_rd(E8,power_dict)
        # print(FF_teams)
        FF = iter(FF_teams)
        C_teams = pure_rd(FF,power_dict)
        # print(C_teams)
        C = iter(C_teams)
        W = pure_rd(C,power_dict)
        winner = ''.join(W)
        # print(winner)
        return Rd2_teams, S16_teams, E8_teams, FF_teams, C_teams, winner

    def predict_rd(rd,power_dict):
        next_rd_teams = []
        for team in rd:
            team1 = team
            team2 = next(rd)
            # print(team1 + ' - ' + team2 + ': ' + str(prob(power_ratings[team1],power_ratings[team2])) + ' %')
            odds = prob(power_dict[team1],power_dict[team2])
            rand = random.uniform(0,100)
            if rand < odds:
                winner = team1
            else:
                winner = team2  
            next_rd_teams.append(winner)   
            # print(odds, rand, winner)
        return next_rd_teams

    def predict_bracket(power_dict):
        Rd1 = iter(empty_bracket_df['Rd1'].values)
        Rd2_teams = predict_rd(Rd1,power_dict)
        # print(Rd2_teams)
        Rd2 = iter(Rd2_teams)
        S16_teams = predict_rd(Rd2,power_dict)
        # print(S16_teams)
        S16 = iter(S16_teams)
        E8_teams = predict_rd(S16,power_dict)
        # print(E8_teams)
        E8 = iter(E8_teams)
        FF_teams = predict_rd(E8,power_dict)
        # print(FF_teams)
        FF = iter(FF_teams)
        C_teams = predict_rd(FF,power_dict)
        # print(C_teams)
        C = iter(C_teams)
        W = predict_rd(C,power_dict)
        winner = ''.join(W)
        # print(winner)
        return Rd2_teams, S16_teams, E8_teams, FF_teams, C_teams, winner

    # Now we will fill out the bracket, preparing it for writing to csv
    def fill_bracket(bracket_df,power_dict,pure=False):
        if pure:
            bracket = pure_bracket(power_dict)
        else:
            bracket = predict_bracket(power_dict)

        Rd2 = bracket[0]
        S16 = bracket[1]
        E8 = bracket[2]
        FF = bracket[3]
        C = bracket[4]
        winner = bracket[5]

        i = 0
        for team in Rd2:
            bracket_df.at[i,'Rd2'] = team
            i+=2
        i = 0
        for team in S16:
            bracket_df.at[i,'S16'] = team
            i+=4
        i = 0
        for team in E8:
            bracket_df.at[i,'E8'] = team
            i+=8
        i = 0
        for team in FF:
            bracket_df.at[i,'FF'] = team
            i+=16
        i = 0
        for team in C:
            bracket_df.at[i,'C'] = team
            i+=32
        bracket_df.at[0,'W'] = winner

        return bracket_df

    filename = r'C:\Users\Brian\Documents\MarchMadness2019\brackets\formula' + str(FORMULA_NUMBER) + '_' + str(YEAR) + '_0.csv'
    pure_bracket_df = fill_bracket(bracket_df,POWER_DICT,True)
    write_csv_error = bracket_df.to_csv (filename, index = None, header=True)

    for i in range(1,NUM_BRACKETS+1):
        filename = r'C:\Users\Brian\Documents\MarchMadness2019\brackets\formula' + str(FORMULA_NUMBER) + '_' + str(YEAR) + '_' + str(i) + '.csv'
        bracket_df = fill_bracket(bracket_df,POWER_DICT)
        write_csv_error = bracket_df.to_csv (filename, index = None, header=True)

    if not write_csv_error:
        print('Successfully completed' , str(FORMULA_NUMBER))
        
    return

def get_team(teamid):
    teams = pd.read_csv(r'config/Teams.csv')
    team = teams.loc[teams['TeamID'] == teamid]
    return team['TeamName'].item()
        
def teamid_to_name(scores):
    namescores = dict()
    for teamid, power in scores.items():
        name = get_team(teamid)
        # print(name, power)
        namescores.update({name:power})
    return namescores

# Make global
tour_comp_results = pd.read_csv(r'config/Stage2DataFiles/NCAATourneySeeds.csv')
tour_dfyear = tour_comp_results.groupby("Season").get_group(2019)
tourteams_this_year = []
for index,team in tour_dfyear.iterrows():
    if team['TeamID'] not in tourteams_this_year:
        tourteams_this_year.append(team['TeamID'])

def just_tourney(scores):
    tourney_scores = {}
    for teamid, power in scores.items():
        if teamid in tourteams_this_year:
            tourney_scores[teamid] = power
    return tourney_scores

# Human Scores

In [2]:
def human_scores(person):
    filename = r'C:\Users\Brian\Documents\MarchMadness2019\config\{}PowerRankings.csv'.format(person)
    human_score_df = pd.read_csv(filename)
    humanscores = {}
    for index, team in human_score_df.iterrows():
        humanscores[team['team']] = team['power']
    return humanscores

# Crawl Scores

In [3]:
from collections import defaultdict
import copy

folder = "config/Stage2DataFiles/"

# conferences = pd.read_csv(folder + "Conferences.csv")
reg_comp_results = pd.read_csv(folder + "RegularSeasonCompactResults.csv")
tour_comp_results = pd.read_csv(r'config/Stage2DataFiles/NCAATourneySeeds.csv')

df2019 = reg_comp_results.groupby("Season").get_group(2019)
npteamarr = df2019['WTeamID'].unique()

# Return tuples of top 10 in sorted order
def sort10(xs, top=10):
    return sorted(xs.items(), reverse=True, key=lambda tup: tup[1])[:top]

def sortall(xs):
    return sorted(xs.items(), reverse=True, key=lambda tup: tup[1])

winners = {}
losers = {}
i = 0
for teamid in df2019['WTeamID']:
    winners[i] = teamid
    i+=1
i=0
for teamid in df2019['LTeamID']:
    losers[i] = teamid
    i+=1

def build_graph():
    # make empty dict
    graph_dict = defaultdict(list)

    # insert (vertex, list) pairs into dictionary
    for i in winners:
        graph_dict[winners[i]].append(losers[i])

    #print(graph_dict[1104])
    return graph_dict

def build_inverse_graph():
    # make empty dict
    i_graph_dict = defaultdict(list)

    # insert (vertex, list) pairs into dictionary
    for i in losers:
        i_graph_dict[losers[i]].append(winners[i])

    #print(graph_dict['1104'])
    return i_graph_dict

def getAllNodes(graph_dict):
    allNodes = []
    edges = 0
    for winner in graph_dict:
        edges += len(graph_dict[winner])
        if winner not in allNodes:
            allNodes.append(winner)
        for loser in graph_dict[winner]:
            if loser not in allNodes:
                allNodes.append(loser)
    return allNodes,edges

def normalize(scores):
    mini = float(min(scores.values()))
    maxi = float(max(scores.values()))
    return {user: (float(scores[user])-mini)/(maxi-mini) for user in scores}

# The meaty stuff
def crawl_teams(df,MAX_DIFF=20):
    
    tup = getAllNodes(graph_dict)
    allNodes = tup[0]
    edges = tup[1]
    #pprint(allNodes)
    
    team_scores = {user: 1 for user in allNodes}
    
    maxloops = 1000 # to prevent infinite while
    e = 0.00001 # stop condition
    
    # Here's what we loop:
    for i in range(maxloops):
        
        oldsum = sum(team_scores.values())
        # Now do team_scores again
        for index, game in df.iterrows():
            diff = game['WScore'] - game['LScore']
            if diff > MAX_DIFF:
                diff = MAX_DIFF
            prevWscore = team_scores[game['WTeamID']]
            team_scores[game['WTeamID']] += team_scores[game['LTeamID']] * (diff/20)
        
        team_scores = normalize(team_scores)
        newsum = sum(team_scores.values())
        
        # Break condition - if vals are changing by less than epsilon, stop looping
        epsilon = newsum - oldsum
        if (abs(epsilon) < e):
             break
    return dict(sortall(team_scores))

# The meaty stuff
def seed_scores(scores):
    seedscores = copy.deepcopy(scores)
    tour_dfyear = tour_comp_results.groupby("Season").get_group(2019)
    
    tourteams_this_year = []
    for index,team in tour_dfyear.iterrows():
        if team['TeamID'] not in tourteams_this_year:
            tourteams_this_year.append(team['TeamID'])
            seed = int(team['Seed'][1:3])
            seedscores[team['TeamID']] *= (17 - seed)/16
            
    return dict(sortall(seedscores))

def square_scores(scores):
    squarescores = copy.deepcopy(scores)
    for team, score in scores.items():
        squarescores[team] = score * score
    return squarescores

def cube_scores(scores):
    cubescores = copy.deepcopy(scores)
    for team, score in scores.items():
        cubescores[team] = score * score * score
    return cubescores

def quad_scores(scores):
    quadscores = copy.deepcopy(scores)
    for team, score in scores.items():
        quadscores[team] = score * score * score * score
    return quadscores

# PageRank

In [4]:
import networkx as nx

def pagerank(df2019):
    MAX_DIFF = 20

    def build_G():
        G=nx.DiGraph()
        for teamid in df2019['WTeamID']:
            G.add_node(teamid)
        for index, game in df2019.iterrows():
            Wnode = game['WTeamID']
            Lnode = game['LTeamID']
            diff = game['WScore'] - game['LScore']
            if diff > MAX_DIFF:
                diff = MAX_DIFF
            neg_diff = -1 * diff
            G.add_edge(Lnode,Wnode,weight=2*diff)
        return G

    G = build_G()
    pr = nx.pagerank(G)
    # top10_pr = sort10(pr)
    # print(top10_pr)
    prscores = dict(sortall(pr))

    return prscores

# From Rankings

In [5]:
def from_rankings():
    filename = r'config/TourneyTeamData2019.csv'
    ranking_data_df = pd.read_csv(filename)

    blended = {}
    rpiavg = {}
    for index, team in ranking_data_df.iterrows():
        blended[team['team']] = team['blended']
        rpiavg[team['team']] = team['rpiavg']
        
    return blended, rpiavg

# print(blended)
# print("")
# print(rpiavg)

# Call all the functions

In [6]:
graph_dict = build_graph()
i_graph_dict = build_inverse_graph()

crawlscores = crawl_teams(df2019)
crawlscores = just_tourney(crawlscores)
seedcrawlscores = seed_scores(crawlscores)

prscores = pagerank(df2019)
prscores = just_tourney(prscores)
seedprscores = seed_scores(prscores)
squarepr = square_scores(prscores)
cubepr = cube_scores(prscores)
quadpr = quad_scores(prscores)
squareseedpr = square_scores(seedprscores)
cubeseedpr = cube_scores(seedprscores)
quadseedpr = quad_scores(seedprscores)

brianscores = human_scores('Brian')
johnnyscores = human_scores('Johnny')
nickscores = human_scores('Nick')

crawlscores = teamid_to_name(crawlscores)
seedcrawlscores = teamid_to_name(seedcrawlscores)
prscores = teamid_to_name(prscores)
seedprscores = teamid_to_name(seedprscores)
squarepr = teamid_to_name(squarepr)
cubepr = teamid_to_name(cubepr)
quadpr = teamid_to_name(quadpr)
squareseedpr = teamid_to_name(squareseedpr)
cubeseedpr = teamid_to_name(cubeseedpr)
quadseedpr = teamid_to_name(quadseedpr)

tup = from_rankings()
blended = tup[0]
rpiavg = tup[1]

# print(brianscores)
# print("")
# print(johnnyscores)
# print("")
# print(nickscores)
# print("")
# print(crawlscores)
# print("")
# print(seedcrawlscores)
# print("")
# print(prscores)
# print("")
# print(squarepr)
# print("")
# print(cubepr)
# print("")
# print(quadpr)
# print("")
# print(seedprscores)
# print("")
# print(squareseedpr)
# print("")
# print(cubeseedpr)
# print("")
# print(quadseedpr)
# print("")
# print(blended)
# print("")
# print(rpiavg)

# Now let's make the brackets

In [8]:
make1000(brianscores,1)
make1000(johnnyscores,2)
make1000(nickscores,3)
make1000(crawlscores,4)
make1000(seedcrawlscores,5)
make1000(prscores,6)
make1000(squarepr,7)
make1000(cubepr,8)
make1000(quadpr,9)
make1000(seedprscores,10)
make1000(squareseedpr,11)
make1000(cubeseedpr,12)
make1000(quadseedpr,13)
make1000(blended,14)
make1000(rpiavg,15)

Successfully completed 1
Successfully completed 2
Successfully completed 3
Successfully completed 4
Successfully completed 5
Successfully completed 6
Successfully completed 7
Successfully completed 8
Successfully completed 9
Successfully completed 10
Successfully completed 11
Successfully completed 12
Successfully completed 13
Successfully completed 14
Successfully completed 15
