In [2]:
cd ..

/Users/sethhendrickson/cbbmodel


In [3]:
import pandas as pd
import pymc
import numpy as np
from DB import DB
import scipy.stats as scs
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
def get_season(dt):
    year = dt.year
    month = dt.month
    if month > 6:
        return year + 1
    else:
        return year

In [11]:
games = pd.read_sql("SELECT * FROM games_test", DB.conn)
games['season']  = games.dt.map(lambda d: get_season(d))
games = games[games.season == 2015]
hteams = games[['hteam_id']]
ateams = games[['ateam_id']]
hteams = hteams.rename(columns={'hteam_id': 'team_id'})
ateams = ateams.rename(columns={'ateam_id': 'team_id'})
teams = pd.concat([hteams, ateams], axis=0)
teams.drop_duplicates(inplace=True)

In [19]:
acc = {int(team_id): {'w': 0, 'l': 0, 'ow': 0, 'ol': 0, 'oow': 0, 'ool': 0} for team_id in teams.dropna().team_id.values}

In [35]:
for idx, game in games.iterrows():
    if game.home_outcome:
        if not np.isnan(game.hteam_id):
            acc[game.hteam_id]['w'] += 1
        if not np.isnan(game.ateam_id):
            acc[game.ateam_id]['l'] += 1
    else:
        if not np.isnan(game.hteam_id):
            acc[game.hteam_id]['l'] += 1
        if not np.isnan(game.ateam_id):
            acc[game.ateam_id]['w'] += 1

In [40]:
acc[306]

{'l': 14, 'ol': 0, 'ool': 0, 'oow': 0, 'ow': 0, 'w': 20}

In [45]:
games = np.array([0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0]).reshape((4,4))

In [53]:
data = [['UConn', 64, 'Kansas', 57], ['UConn', 82, 'Duke', 68], ['Minnesota', 71, 'UConn', 72], ['Kansas', 69, 'UConn', 62], ['Duke', 81, 'Minnesota', 70], ['Minnesota', 52, 'Kansas', 62]]

In [55]:
games = pd.DataFrame(data, columns=['hteam', 'hscore', 'ateam', 'ascore'])

In [77]:
teams = np.unique(games.hteam.values.tolist() + games.ateam.values.tolist())
teams = pd.DataFrame(teams[:, np.newaxis], columns=['team'])
teams['idx'] = range(teams.shape[0])
team_index = {team[0]: team[1] for team in teams.values}
print team_index

{'Duke': 0, 'Kansas': 1, 'Minnesota': 2, 'UConn': 3}


In [159]:
played = np.zeros(shape=(teams.shape[0], teams.shape[0]))
wplayed = np.zeros(shape=(teams.shape[0], teams.shape[0]))
wins = np.zeros(shape=(teams.shape[0], teams.shape[0]))
wwins = np.zeros(shape=(teams.shape[0], teams.shape[0]))
win_loss = {team: {'w': 0, 'l': 0} for team in teams.team.values}
for idx, game in games.iterrows():
    i, j = (team_index[game.hteam], team_index[game.ateam])
#     played[i, j] += 1.
#     played[j, i] += 1.
    if game.hscore > game.ascore:
        wins[i, j] += 0.6 # home team wins 0.6 games
        wwins[i, j] += 1.
#         played[i, j] += 0.6  # home team played 0.6 games
#         played[j, i] += 1.4  # away team played 1.4 games
    else:
        wins[j, i] += 1.4  # away team wins 1.4 games
        wwins[j, i] += 1.
    played[i, j] += 0.6  # home team played 0.6 games
    played[j, i] += 1.4  # away team played 1.4 games
    wplayed[i, j] += 1.
    wplayed[j, i] += 1.
#         win_loss[game.hteam]['l'] += 0.6
#         win_loss[game.ateam]['w'] += 1.4

In [165]:
print played
print wins
print wplayed
print wwins

[[ 0.   0.   0.6  1.4]
 [ 0.   0.   1.4  2. ]
 [ 1.4  0.6  0.   0.6]
 [ 0.6  2.   1.4  0. ]]
[[ 0.   0.   0.6  0. ]
 [ 0.   0.   1.4  0.6]
 [ 0.   0.   0.   0. ]
 [ 0.6  0.6  1.4  0. ]]
[[ 0.  0.  1.  1.]
 [ 0.  0.  1.  2.]
 [ 1.  1.  0.  1.]
 [ 1.  2.  1.  0.]]
[[ 0.  0.  1.  0.]
 [ 0.  0.  1.  1.]
 [ 0.  0.  0.  0.]
 [ 1.  1.  1.  0.]]


In [201]:
def get_wp(team, played, wins, exclude=None):
    idx = team_index[team]
    if exclude is not None:
        exclude_idx = team_index[exclude]
        return (np.sum(wins[idx, :]) - wins[idx, exclude_idx]) / (np.sum(played[idx, :]) - played[idx, exclude_idx])
    else:
        return np.sum(wins[idx, :]) / np.sum(played[idx, :])
def opp_wp(team):
    idx = team_index[team]
    opponents = filter(lambda tup: tup[1] > 0., zip(teams.values[:, 0], wplayed[:, idx]))
    vals = [(get_wp(opponent[0], wplayed, wwins, exclude=team), opponent[1]) for opponent in opponents]
    return np.sum(x[0] * x[1] for x in vals) / np.sum(x[1] for x in vals)
def opp_wp(team):
    idx = team_index[team]
    opponents = filter(lambda tup: tup[1] > 0., zip(teams.values[:, 0], wplayed[:, idx]))
    vals = [(opponent[0], get_wp(opponent[0], wplayed, wwins, exclude=team), opponent[1]) for opponent in opponents]
#     return vals
    return np.sum(x[1] * x[2] for x in vals) / np.sum(x[2] for x in vals)
def opp_opp_wp(team, opp_wp):
    pass




In [202]:
print win_loss
for team in win_loss:
    wp = get_wp(team, played, wins)
    print team, wp, opp_wp(team)

{'Duke': {'l': 0, 'w': 0}, 'Kansas': {'l': 0, 'w': 0}, 'Minnesota': {'l': 0, 'w': 0}, 'UConn': {'l': 0, 'w': 0}}
Duke 0.3 0.333333333333
Kansas 0.588235294118 0.666666666667
Minnesota 0.0 0.388888888889
UConn 0.65 0.75


In [105]:
teams.values[played[:, 0].astype(bool)]

array([['Minnesota', 2],
       ['UConn', 3]], dtype=object)

In [212]:
print wplayed#.astype(bool)
np.where(wplayed > 0)

[[ 0.  0.  1.  1.]
 [ 0.  0.  1.  2.]
 [ 1.  1.  0.  1.]
 [ 1.  2.  1.  0.]]


(array([0, 0, 1, 1, 2, 2, 2, 3, 3, 3]), array([2, 3, 2, 3, 0, 1, 3, 0, 1, 2]))

In [260]:
total_played = np.sum(wplayed, axis=1)
total_played = np.dot(np.ones(4)[:, np.newaxis], total_played[:, np.newaxis].T)
print total_played

[[ 2.  3.  3.  4.]
 [ 2.  3.  3.  4.]
 [ 2.  3.  3.  4.]
 [ 2.  3.  3.  4.]]


In [249]:
total_won = np.sum(wwins, axis=1)
print total_won
total_won = np.dot(np.ones(4)[:, np.newaxis], total_won[:, np.newaxis].T)

[ 1.  2.  0.  3.]


In [263]:
print wwins
print wplayed

[[ 0.  0.  1.  0.]
 [ 0.  0.  1.  1.]
 [ 0.  0.  0.  0.]
 [ 1.  1.  1.  0.]]
[[ 0.  0.  1.  1.]
 [ 0.  0.  1.  2.]
 [ 1.  1.  0.  1.]
 [ 1.  2.  1.  0.]]


In [284]:
weighted_total_won = np.sum(wins, axis=1)
weighted_total_played = np.sum(played, axis=1)
wp = weighted_total_won / weighted_total_played
print wp

[ 0.3         0.58823529  0.          0.65      ]


In [286]:
owp = (total_won - wwins.T) / ((total_played - wplayed.T))# * (wplayed.T).astype(bool))
weights = wplayed.T / np.sum(wplayed.T, axis=0)[:, np.newaxis]
owp = np.sum(ow / op * weights, axis=1)
print owp

[ 0.33333333  0.66666667  0.38888889  0.75      ]


In [287]:
(total_played - wplayed.T)

array([[ 2.,  3.,  2.,  3.],
       [ 2.,  3.,  2.,  2.],
       [ 1.,  2.,  3.,  3.],
       [ 1.,  1.,  2.,  4.]])

In [292]:
oowp = np.sum(owp * weights, axis=1)
print oowp

[ 0.56944444  0.62962963  0.58333333  0.51388889]


In [294]:
def rpi(wp, owp, oowp):
    return 0.25*wp + 0.5 * owp + 0.25 * oowp

In [295]:
rpi(wp, owp, oowp)

array([ 0.38402778,  0.63779956,  0.34027778,  0.66597222])