In [8]:
import pandas as pd
import yaml
import re
import numpy as np
from scipy.stats import norm
import math

WEEK = 13

In [2]:
with open("schedule_2017.yaml", "r") as sched_file:
    schedule = yaml.load(sched_file)
schedule

{'Illinois': ['Ball State',
  'Western Kentucky',
  '@South Florida',
  '',
  'Nebraska',
  '@Iowa',
  'Rutgers',
  '@Minnesota',
  'Wisconsin',
  '@Purdue',
  'Indiana',
  '@Ohio State',
  'Northwestern'],
 'Indiana': ['Ohio State',
  '@Virginia',
  '',
  'Georgia Southern',
  '@Penn State',
  'Charleston Southern',
  'Michigan',
  '@Michigan State',
  '@Maryland',
  'Wisconsin',
  '@Illinois',
  'Rutgers',
  '@Purdue'],
 'Iowa': ['Wyoming',
  '@Iowa State',
  'North Texas',
  'Penn State',
  '@Michigan State',
  'Illinois',
  '',
  '@Northwestern',
  'Minnesota',
  'Ohio State',
  '@Wisconsin',
  'Purdue',
  '@Nebraska'],
 'Maryland': ['@Texas',
  'Towson',
  '',
  'Central Florida(UCF)',
  '@Minnesota',
  '@Ohio State',
  'Northwestern',
  '@Wisconsin',
  'Indiana',
  'Rutgers',
  'Michigan',
  '@Michigan State',
  'Penn State'],
 'Michigan': ['Florida',
  'Cincinnati',
  'Air Force',
  '@Purdue',
  '',
  'Michigan State',
  '@Indiana',
  '@Penn State',
  'Rutgers',
  'Minnesota',
 

In [3]:
# Only one week remaining
schedule = {k: v[WEEK-14:] for k, v in schedule.items()}
schedule

{'Illinois': ['Northwestern'],
 'Indiana': ['@Purdue'],
 'Iowa': ['@Nebraska'],
 'Maryland': ['Penn State'],
 'Michigan': ['Ohio State'],
 'Michigan State': ['@Rutgers'],
 'Minnesota': ['Wisconsin'],
 'Nebraska': ['Iowa'],
 'Northwestern': ['@Illinois'],
 'Ohio State': ['@Michigan'],
 'Penn State': ['@Maryland'],
 'Purdue': ['Indiana'],
 'Rutgers': ['Michigan State'],
 'Wisconsin': ['@Minnesota']}

In [9]:
with open("ratings.yaml", "r") as ratings_file:
    ratings = yaml.load(ratings_file)
home_advantage = ratings['HOME ADVANTAGE']
bias = ratings['BIAS']
mse = ratings['MSE']
std = math.sqrt(mse - bias * bias)
del ratings['HOME ADVANTAGE']
del ratings['BIAS']
del ratings['MSE']
ratings

{'Illinois': 57.34,
 'Indiana': 74.74,
 'Iowa': 80.7,
 'Maryland': 67.37,
 'Michigan': 84.03,
 'Michigan State': 79.75,
 'Minnesota': 71.68,
 'Nebraska': 69.39,
 'Northwestern': 82.94,
 'Ohio State': 93.22,
 'Penn State': 93.24,
 'Purdue': 76.96,
 'Rutgers': 63.24,
 'Wisconsin': 93.88}

In [5]:
predictions = pd.read_csv("predictions.csv")
predictions

Unnamed: 0,Team,Current Wins,Games Remaining,AVG,Austin,Bill,Bret,Julia,Dale,David,...,Liz,Luke,Matt,Max,Neil,Phil C,Phil K,Ron,Sara,Sasha
0,Illinois,2,2,3.17,2,2,4,4,2,5,...,3,3,2,3,3,3,4,3,2,4
1,Indiana,5,2,5.35,5,5,6,6,3,6,...,6,6,5,6,6,4,6,6,4,6
2,Iowa,6,2,7.3,6,7,8,7,6,8,...,7,6,8,6,7,9,7,8,7,7
3,Maryland,4,2,4.35,3,6,5,6,3,5,...,3,4,5,3,3,3,4,6,4,3
4,Michigan,8,2,9.26,10,10,10,9,9,10,...,10,9,9,9,9,10,8,9,9,9
5,Michigan State,8,2,5.78,3,7,6,8,6,4,...,8,6,5,4,5,7,5,4,6,5
6,Minnesota,5,2,6.61,8,7,5,7,6,9,...,7,9,6,5,8,4,6,8,4,5
7,Nebraska,4,2,7.17,7,8,7,6,7,7,...,8,7,8,6,7,7,6,9,6,6
8,Northwestern,8,2,7.3,9,8,9,7,7,8,...,7,9,8,8,8,7,8,6,5,9
9,Ohio State,9,2,10.87,11,11,11,11,10,11,...,10,12,10,11,11,12,11,11,10,11


In [6]:
# Subtract off current wins to know how far off we are right now
pickers = ["Austin", "Bill", "Bret", "Julia", "Dale", "David", "Dawn", "Diane", "Erica", "Glenn", "Jessica", "John", "Kim", "Liz", "Luke", "Matt", "Max", "Neil", "Phil C", "Phil K", "Ron", "Sara", "Sasha"]
for col in pickers:
    predictions[col] = predictions[col] - predictions['Current Wins']

In [7]:
predictions = predictions.melt(id_vars=["Team"], value_vars=pickers, var_name="picker", value_name="remaining_prediction")

In [13]:
# Build all remaining
games = []
for t1, rem in schedule.items():
    for t2 in rem:
        if t2[0] == '@': # WARNING: neutral sites will be problematic!
            game = (t2[1:], t1, norm.cdf(ratings[t1] - ratings[t2[1:]] - bias, scale=std))
        else:
            game = (t1, t2, norm.cdf(ratings[t2] - ratings[t1] - bias, scale=std))
        if game not in games:
            games.append(game)

games

[('Illinois', 'Northwestern', 0.94757949399564367),
 ('Purdue', 'Indiana', 0.45008654100876411),
 ('Nebraska', 'Iowa', 0.76556669216893058),
 ('Maryland', 'Penn State', 0.94937064243623093),
 ('Michigan', 'Ohio State', 0.72279853400934013),
 ('Rutgers', 'Michigan State', 0.85335148456180954),
 ('Minnesota', 'Wisconsin', 0.92047639242496482)]

In [22]:
win_count = {p:0 for p in pickers}
for itr in range(pow(2, len(games))):
    pred = predictions.copy()
    prob = 1.
    for gn, game in enumerate(games):
        winner = (itr >> gn) & 1 # 0 or 1
        t = game[winner]
        p = game[2]
        if winner == 0:
            p = 1 - p
        prob *= p
        pred.loc[pred['Team'] == t, 'remaining_prediction'] -= 1
    outcome = pred.groupby('picker')['remaining_prediction'].aggregate(lambda x: np.sum(np.power(x, 2)))
    for picker in outcome[outcome == outcome.min()].index:
        win_count[picker] += prob

win_count

{'Austin': 0,
 'Bill': 0,
 'Bret': 0,
 'Dale': 0.48324416689407823,
 'David': 0,
 'Dawn': 0,
 'Diane': 0,
 'Erica': 0.028889700621066407,
 'Glenn': 0,
 'Jessica': 0,
 'John': 0,
 'Julia': 0.64851546538986882,
 'Kim': 0,
 'Liz': 0.022453821434957135,
 'Luke': 0,
 'Matt': 0.00023142889576698543,
 'Max': 0,
 'Neil': 0,
 'Phil C': 0,
 'Phil K': 0.048532323740386393,
 'Ron': 0,
 'Sara': 0,
 'Sasha': 0}