In [118]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../src')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [119]:
def weekify_stats(stats, schedule):
    full_daily_stats = []
    gdates = pd.to_datetime(stats.index.get_level_values('gameDate'), format='%Y%m%d')
    for week, games in schedule.items():
        games_df = pd.DataFrame(games)
        week_dates = pd.date_range(games_df.ts.min(), games_df.ts.max())
        for date in week_dates:
            daily_stats = stats[(gdates == date)].copy()
            daily_stats['date'] = date
            daily_stats['week'] = week
            full_daily_stats.append(daily_stats)
    return pd.concat(full_daily_stats)

In [120]:
from itertools import combinations, product, chain
from ast import literal_eval
import collections

POSITIONS_QUOTA = {
        'D': 4,
        'C': 2,
        'RW': 2,
        'LW': 2,
        'G': 2,
        'WC': 1
}

def check_valid_lineup(lineup, position_lookup, positions_quota=None):
    if positions_quota is None:
        positions_quota = {k: v for k, v in POSITIONS_QUOTA.items()}
    
    positions_map = {k:[] for k in positions_quota}
    multi_positions = []
    for player in lineup:
        if len(position_lookup[player]) == 1:
            pos = position_lookup[player][0]
            if len(positions_map[pos]) == positions_quota[pos]:
                if len(positions_map['WC']) == 0:
                    positions_map['WC'] = [player]
                else:
                    return False
            else:
                positions_map[pos] += [player]     
        else:
            multi_positions.append(player)

    positions_left = [p for p, v in positions_map.items() for i in range(positions_quota[p]-len(v))]
    multi_pos_positions = [position_lookup[p] for p in multi_positions]

    for c in list(product(*multi_pos_positions)):
        if collections.Counter(c) == collections.Counter(positions_left) or len(c) < 13:
            return c
    return False
    

def get_valid_lineups(day_teams, min_players=13):
    position_lookup = day_teams.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()
    n_combs = min(min_players, len(position_lookup))
    lineups = list(combinations(position_lookup.keys(), n_combs))

    
    valid_lineups = [l for l in lineups if check_valid_lineup(l, position_lookup) != False and len(l) > 0]
    if len(valid_lineups) == 0:
        if n_combs > 2:
            return get_valid_lineups(day_teams, min_players=n_combs-1)
    return valid_lineups

def get_lineup_teams(lineups, day_teams):
    teams = []
    for lineup in lineups:
        match_filter = (day_teams.playerId.isin(lineup))
        teams.append(day_teams[match_filter])
    return pd.concat(teams, keys=range(len(teams)), names=['lineup'])


from scipy import stats
from process_data import PRED_COLS

def get_resulted_points(lineup):
    metrics = [m for metrics in PRED_COLS.values() for m in metrics]
    res = lineup[metrics].sum()
    return res

def get_week_result(own_total, opp_total):
    result = 0
    for col in ['g','a','sog','fow','hit','block','pim','ppp', 'win', 'save', 'so']:
        if own_total[col] - opp_total[col] > 0:
            result += 1
        if own_total[col] == opp_total[col]:
            result += 0.5
    plusmin = own_total['goalsfor'] - opp_total['goalsfor'] + opp_total['goalsaga'] - own_total['goalsaga']
    if plusmin > 0:
        result += 1
    if plusmin == 0:
        result += 0.5
    
    gaa = own_total['ga'] / own_total['icetime'] - opp_total['ga'] / opp_total['icetime']
    if gaa < 0:
        result += 1
    if gaa == 0:
        result += 0.5
    return result

In [121]:
def get_pct_lineups(day_teams, team_id):
    lineups = get_valid_lineups(day_teams[day_teams.team_id == team_id])
    if len(lineups) == 0:
        return [()]
    lineup_teams = get_lineup_teams(lineups, day_teams)
    ranksum = lineup_teams.groupby('lineup')['rank'].sum()
    best = ranksum.rank(pct=True).sort_values().index.tolist()
    return [lineups[i] for i in best]

In [11]:
import yahoo_utils

game_id = 427
# players = yahoo_utils.get_players(game_id)
# players = players[~players.playerId.duplicated()]
# player_info = players.set_index('playerId')[['name','pos']]
# current_schedule = yahoo_utils.get_games_by_week(game_id)
teams = yahoo_utils.get_teams(game_id)
all_matchups = yahoo_utils.get_all_matchups(game_id)


In [83]:
def weekify_stats(stats, matchups):
    full_daily_stats = []
    gdates = pd.to_datetime(stats.gameDate, format='%Y%m%d')
    for m in all_matchups:
        for date in pd.date_range(m.week_start, m.week_end):
            daily_stats = stats[(gdates == date)].copy()
            daily_stats['date'] = date
            daily_stats['week'] = m.week
            full_daily_stats.append(daily_stats)
    return pd.concat(full_daily_stats).drop_duplicates()

In [89]:

import pandas as pd
y = pd.read_hdf('data/y.h5')
y_g = pd.read_hdf('data/y_g.h5')
y = pd.concat([y,y_g])
preds = pd.read_csv('data/2324_preds_simple.csv')
preds = preds.drop(['playerTeam','opposingTeam'], axis=1).drop_duplicates()
daily_preds = weekify_stats(preds, team_matchups)
daily_preds = daily_preds.merge(players[['playerId','name', 'pos', 'player_key']], on='playerId', how='left')
daily_preds['date'] = daily_preds.date.fillna(pd.to_datetime(daily_preds.gameDate, format='%Y%m%d'))
daily_results = daily_preds[['date','week', 'name','pos','player_key','playerId','gameDate']].merge(y.reset_index(), on=['playerId','gameDate'], how='left')

In [90]:
from ast import literal_eval
from process_data import PRED_COLS
current_team_id = '427.l.21834.t.3'
team_matchups = [m for m in all_matchups if current_team_id in [t.team_key for t in m.teams]]
position_lookup = players.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()

metrics = [m for metrics in PRED_COLS.values() for m in metrics]


In [115]:
selected_team = []
finals = {}
for m in team_matchups[1:]:
    print(m.week)
    opponent_id = [t.team_key for t in m.teams if t.team_key != current_team_id][0]
    dates = pd.date_range(m.week_start, m.week_end)
    
    
    ranks = daily_preds[daily_preds.date <= (m.week_start)].groupby('playerId')[metrics].agg('last')
    ranks['ga'] = -ranks['ga'] / ranks['icetime']
    ranks = ranks.drop('icetime', axis=1)
    ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
    ranks.name = 'rank'
    
    
    
    week_teams = teams.loc[(pd.to_datetime(teams.index) >= m.week_start)&(pd.to_datetime(teams.index) <= m.week_end)]
    starting_teams = teams.loc[(pd.to_datetime(teams.index) == m.week_start)]
    week_results = daily_results[(daily_results.date >= m.week_start)&(daily_results.date <= m.week_end)]
    week_results = week_results.merge(ranks, left_on='playerId', right_index=True)
    week_results = week_results.merge(week_teams, on=['player_key', 'date'])
        
    current_lineup = teams[(teams.team_id == current_team_id)&(teams.index.get_level_values('date') == m.week_start)]
    current_lineup = current_lineup.merge(players, how='left', on='player_key').playerId.tolist()

    all_available_players = players[(~players.player_key.isin(starting_teams.player_key))|(players.player_key.isin(current_lineup))]
    all_available_players = all_available_players.playerId.tolist()
    
    if len(selected_team) > 0:
        current_lineup = [p for p in selected_team]
    
    selected_team = []
    while len(selected_team) < 17:
        if len(selected_team) < 15:
            available = [p for p in current_lineup if p not in selected_team]
            
        else:
            available = [p for p in all_available_players if p not in selected_team]
            
        selected_player = ranks.loc[[p for p in available if p in ranks]].idxmax()

        selected_team.append(selected_player)
      
    opp_points = pd.Series({m:0 for metrics in PRED_COLS.values() for m in metrics})
    own_points = pd.Series({m:0 for metrics in PRED_COLS.values() for m in metrics})

    for date in dates:
        ranks = daily_preds[daily_preds.date <= (date)].groupby('playerId')[metrics].agg('last')
        ranks['ga'] = -ranks['ga'] / ranks['icetime']
        ranks = ranks.drop('icetime', axis=1)
        ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
        ranks.name = 'rank'
        week_results = daily_results[(daily_results.date >= m.week_start)&(daily_results.date <= m.week_end)]
        week_results = week_results.merge(ranks, left_on='playerId', right_index=True)
        week_results = week_results.merge(week_teams, on=['player_key', 'date'])
    
        day_teams = week_results.loc[week_results.date == date]
        
        opp_selected_lineup = get_pct_lineups(day_teams, opponent_id)[0]
        opp_selected_team = week_results.loc[week_results.playerId.isin(opp_selected_lineup)&(week_results.date == date)]
        opp_points += get_resulted_points(opp_selected_team)
        
        selected_lineup = get_pct_lineups(day_teams, current_team_id)[0]
        selected_lineup_stats = week_results.loc[week_results.playerId.isin(selected_lineup)&(week_results.date == date)]
        own_points += get_resulted_points(selected_lineup_stats)
        
    finals[m.week] = get_week_result(own_points, opp_points)
    print(finals[m.week])
    print(player_info.loc[selected_team])
    print('DROPS')
    for p in current_lineup:
        if p not in selected_team:
            print(player_info.loc[p].to_dict())
    print('ADDS')
    for p in selected_team:
        if p not in current_lineup:
            print(player_info.loc[p].to_dict())

2
6
                      name          pos
playerId                               
8477934     Leon Draisaitl  ['C', 'LW']
8476468        J.T. Miller  ['C', 'RW']
8474564     Steven Stamkos  ['C', 'LW']
8478483    Mitchell Marner       ['RW']
8477496     Elias Lindholm        ['C']
8475167      Victor Hedman        ['D']
8473994         Jamie Benn       ['LW']
8470794       Joe Pavelski  ['C', 'RW']
8479325     Charlie McAvoy        ['D']
8474593    Jacob Markstrom        ['G']
8475753       Justin Faulk        ['D']
8478492      Ilya Samsonov        ['G']
8475172        Nazem Kadri        ['C']
8476854    Hampus Lindholm        ['D']
8477346   MacKenzie Weegar        ['D']
8476881        Tomas Hertl        ['C']
8476875      Mike Matheson        ['D']
DROPS
{'name': 'Tristan Jarry', 'pos': "['G']"}
{'name': 'Chris Kreider', 'pos': "['LW']"}
ADDS
{'name': 'Tomas Hertl', 'pos': "['C']"}
{'name': 'Mike Matheson', 'pos': "['D']"}
3
8
                      name          pos
playerId      

In [117]:
a = """1	logo kultaiseksi maalattu radio	Loss	5 - 8
2	logo Kuulalakeri	Loss	4 - 8
3	logo Haukion Kala	Win	8 - 5
4	logo Piljardi	Loss	6 - 7
5	logo NWOSHM	Win	8 - 4
6	logo Kokouslihaa	Loss	4 - 7
7	logo Desinfiointiasema	Win	6 - 5
8	logo kultaiseksi maalattu radio	Loss	3 - 7
9	logo Kuulalakeri	Win	6 - 5
10	logo Haukion Kala	Win	6 - 4
11	logo Piljardi	Tie	6 - 6
12	logo NWOSHM	Loss	2 - 10
13	logo Kokouslihaa	Loss	3 - 10
14	logo Desinfiointiasema	Win	8 - 4
15	logo kultaiseksi maalattu radio	Loss	5 - 8
16	logo Kuulalakeri	Win	9 - 2
17	logo Haukion Kala	Win	8 - 3
18	logo Piljardi	Loss	2 - 11
19	logo NWOSHM	Win	6 - 5
20	logo Kokouslihaa	Loss	5 - 7
21	logo Desinfiointiasema	Loss	3 - 9
22	logo kultaiseksi maalattu radio	Tie	5 - 5
23	logo Kuulalakeri	Loss	2 - 10
24	logo Haukion Kala	Loss	3 - 9
25	logo Piljardi	Win	6 - 5
26	logo NWOSHM	Loss	3 - 9"""
actuals = {int(s.split('\t')[0]):int(s.split('\t')[-1].split()[0]) for s in a.split('\n')}
print((pd.Series(finals) - pd.Series(actuals)).sum())
(pd.Series(finals) - pd.Series(actuals))

39.5


1     NaN
2     2.0
3     0.0
4     2.5
5    -1.0
6     5.0
7     2.0
8    -2.0
9     0.5
10    1.5
11   -0.5
12    2.0
13    1.0
14    0.0
15   -1.0
16    0.5
17    1.5
18    3.0
19    3.0
20    3.5
21    3.0
22    3.0
23    1.5
24    1.5
25    1.0
26    6.0
dtype: float64