In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../src')

In [2]:
import yahoo_utils

game_id = 427
players = yahoo_utils.get_players(game_id)
current_schedule = yahoo_utils.get_games_by_week(game_id)
teams = yahoo_utils.get_teams(game_id)
all_matchups = yahoo_utils.get_all_matchups(game_id)

In [411]:
import pandas as pd
preds = pd.read_csv('../data/2324_preds.csv').set_index(['playerId','gameId','playerTeam','opposingTeam','gameDate','week'])
preds = preds.clip(0, np.inf)
y = pd.read_hdf('data/y.h5')
y_g = pd.read_hdf('data/y_g.h5')
y = pd.concat([y,y_g])


def weekify_stats(stats, schedule):
    full_daily_stats = []
    gdates = pd.to_datetime(stats.index.get_level_values('gameDate'), format='%Y%m%d')
    for week, games in schedule.items():
        games_df = pd.DataFrame(games)
        week_dates = pd.date_range(games_df.ts.min(), games_df.ts.max())
        for date in week_dates:
            daily_stats = stats[(gdates == date)].copy()
            daily_stats['date'] = date
            daily_stats['week'] = week
            full_daily_stats.append(daily_stats)
    return pd.concat(full_daily_stats)

daily_preds = weekify_stats(preds, current_schedule)
daily_preds = daily_preds.merge(players[['playerId','name', 'pos', 'player_key']], on='playerId', how='left')

daily_results = weekify_stats(y, current_schedule)
daily_results = daily_results.reset_index().merge(players[['playerId','name', 'pos', 'player_key']], on='playerId', how='left')

In [803]:
from itertools import combinations, product, chain
from ast import literal_eval
import collections

POSITIONS_QUOTA = {
        'D': 4,
        'C': 2,
        'RW': 2,
        'LW': 2,
        'G': 2,
        'WC': 1
}

def check_valid_lineup(lineup, position_lookup, positions_quota=None):
    if positions_quota is None:
        positions_quota = {k: v for k, v in POSITIONS_QUOTA.items()}
    
    positions_map = {k:[] for k in positions_quota}
    multi_positions = []
    for player in lineup:
        if len(position_lookup[player]) == 1:
            pos = position_lookup[player][0]
            if len(positions_map[pos]) == positions_quota[pos]:
                if len(positions_map['WC']) == 0:
                    positions_map['WC'] = [player]
                else:
                    return False
            else:
                positions_map[pos] += [player]     
        else:
            multi_positions.append(player)

    positions_left = [p for p, v in positions_map.items() for i in range(positions_quota[p]-len(v))]
    multi_pos_positions = [position_lookup[p] for p in multi_positions]

    for c in list(product(*multi_pos_positions)):
        if collections.Counter(c) == collections.Counter(positions_left) or len(c) < 13:
            return c
    return False
    

def get_valid_lineups(day_teams, min_players=13):
    position_lookup = day_teams.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()
    n_combs = min(min_players, len(position_lookup))
    lineups = list(combinations(position_lookup.keys(), n_combs))

    
    valid_lineups = [l for l in lineups if check_valid_lineup(l, position_lookup) != False and len(l) > 0]
    if len(valid_lineups) == 0:
        if n_combs > 2:
            return get_valid_lineups(day_teams, min_players=n_combs-1)
    return valid_lineups

def get_lineup_teams(lineups, day_teams):
    teams = []
    for lineup in lineups:
        match_filter = (day_teams.playerId.isin(lineup))
        teams.append(day_teams[match_filter])
    return pd.concat(teams, keys=range(len(teams)), names=['lineup'])


from scipy import stats
from process_data import PRED_COLS

def get_expected_points(exp, opp_exp):
    vals = {}
    
    no_results = exp.isna() | exp == 0
    
    for col in ['g','a','sog','fow','hit','block','pim','ppp', 'win', 'save', 'so']:
        if not no_results[col]:
            vals[col] = stats.skellam.cdf(-1, opp_exp[col], exp[col]) + 0.5 * stats.skellam.pmf(0, opp_exp[col], exp[col])
            
    if not no_results['goalsfor'] and not no_results['goalsaga']:
        vals['plusmin'] = stats.skellam.cdf(-1, opp_exp['goalsfor']+exp['goalsaga'], opp_exp['goalsaga']+exp['goalsfor']) \
                        + 0.5 * stats.skellam.pmf(0, opp_exp['goalsfor']+exp['goalsaga'], opp_exp['goalsaga']+exp['goalsfor'])

    if not no_results['icetime'] and not no_results['ga']:
        vals['gaa'] = stats.skellam.cdf(-1, exp['ga']/exp['icetime'], opp_exp['ga']/opp_exp['icetime'])\
                    + 0.5 * stats.skellam.pmf(0, exp['ga']/exp['icetime'], opp_exp['ga']/opp_exp['icetime'])     
    return sum(vals.values())

def get_resulted_points(lineup):
    metrics = [m for metrics in PRED_COLS.values() for m in metrics]
    res = lineup[metrics].sum()
    return res

def get_week_result(own_total, opp_total):
    result = 0
    for col in ['g','a','sog','fow','hit','block','pim','ppp', 'win', 'save', 'so']:
        if own_total[col] - opp_total[col] > 0:
            result += 1
        if own_total[col] == opp_total[col]:
            result += 0.5
    plusmin = own_total['goalsfor'] - opp_total['goalsfor'] + opp_total['goalsaga'] - own_total['goalsaga']
    if plusmin > 0:
        result += 1
    if plusmin == 0:
        result += 0.5
    
    gaa = own_total['ga'] / own_total['icetime'] - opp_total['ga'] / opp_total['icetime']
    if gaa < 0:
        result += 1
    if gaa == 0:
        result += 0.5
    return result

In [804]:
def optimize_lineup(preds, roster, opp_exp):
    lineups = get_valid_lineups(preds[preds.playerId.isin(roster)])        

    if len(lineups) > 0:
        lineup_teams = get_lineup_teams(lineups, preds)
        
        result = lineup_teams.groupby('lineup').apply(lambda x: opp_lineup_teams.groupby('lineup').apply(lambda y: get_expected_points(x, opp_exp)))
        own_selected_lineup = lineups[result.idxmax()]
        return own_selected_lineup
        
    else:
        return []
    

def get_pct_lineups(day_teams, team_id):
    lineups = get_valid_lineups(day_teams[day_teams.team_id == team_id])
    if len(lineups) == 0:
        return [()]
    lineup_teams = get_lineup_teams(lineups, day_teams)
    metric_sums = lineup_teams.groupby('lineup')[metrics].sum()
    best = metric_sums.rank(pct=True).sum(1).sort_values().index.tolist()
    return [lineups[i] for i in best]

In [834]:
def get_positions_quota(selected_team, pos_lookup):
    positions_quota = {k: v for k, v in POSITIONS_QUOTA.items()}
    lineup_check_players = []
    for player in selected_team:
        if len(pos_lookup[player]) == 1:
            positions_quota[pos_lookup[player][0]] -= 1
        else:
            lineup_check_players.append(player)
    return positions_quota, lineup_check_players

In [836]:
players = players[~players.playerId.duplicated()]
player_info = players.set_index('playerId')[['name','pos']]

In [864]:
daily_preds

Unnamed: 0,playerId,g,a,sog,fow,hit,block,pim,goalsfor,goalsaga,ppp,ga,win,so,save,icetime,date,week,name,pos,player_key
0,8470621,0.171275,0.207537,1.438163,0.030374,0.603384,0.355591,0.765039,0.410782,0.287953,0.061132,,,,,,2023-10-10,1,Corey Perry,['RW'],427.p.3365
1,8473986,0.289820,0.383150,1.883711,0.029522,0.758111,0.506706,0.552788,0.620279,0.496826,0.196790,,,,,,2023-10-10,1,Alex Killorn,"['LW', 'RW']",427.p.5626
2,8474013,0.046603,0.177366,1.257958,0.040283,1.439655,1.529032,0.589603,0.714400,0.569436,0.000000,,,,,,2023-10-10,1,Ian Cole,['D'],427.p.4257
3,8474034,0.087127,0.117864,0.789029,0.051592,2.148108,0.282324,1.202437,0.398256,0.319671,0.000000,,,,,,2023-10-10,1,Pat Maroon,['LW'],427.p.4352
4,8474564,0.416986,0.548136,2.573215,5.506841,1.002777,0.567323,0.554659,0.760039,0.610245,0.333463,,,,,,2023-10-10,1,Steven Stamkos,"['C', 'LW']",427.p.4471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90116,8471774,,,,,,,,,,,0.982213,0.142794,0.625575,6.128249,3.359514,2024-04-18,26,Alex Stalock,['G'],427.p.4642
90117,8475852,,,,,,,,,,,1.546129,0.229950,0.365337,15.966809,5.335092,2024-04-18,26,Petr Mrazek,['G'],427.p.5114
90118,8479380,,,,,,,,,,,0.185685,0.038991,0.937165,0.958762,0.779255,2024-04-18,26,Dylan Wells,['G'],427.p.7229
90119,8482821,,,,,,,,,,,0.855380,0.133717,0.680913,7.753466,3.094562,2024-04-18,26,Arvid Soderblom,['G'],427.p.8871


In [None]:
current_team_id = '427.l.21834.t.3'
team_matchups = [m for m in all_matchups if current_team_id in [t.team_key for t in m.teams]]
position_lookup = players.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()

metrics = [m for metrics in PRED_COLS.values() for m in metrics]



selected_team = []
for m in team_matchups[2:]:
    print(m.week)
    opponent_id = [t.team_key for t in m.teams if t.team_key != current_team_id][0]
    dates = pd.date_range(m.week_start, m.week_end)

    week_teams = teams.loc[(pd.to_datetime(teams.index) >= m.week_start)&(pd.to_datetime(teams.index) <= m.week_end)]
    starting_teams = teams.loc[(pd.to_datetime(teams.index) == m.week_start)]
    
    
    week_preds = daily_preds[(daily_preds.date >= m.week_start)&(daily_preds.date <= m.week_end)]
    week_preds = week_preds.merge(week_teams, on=['player_key', 'date'], how='left')
    week_results = week_teams.merge(daily_results.reset_index(), on=['player_key', 'date'])

    opp_points = pd.Series({m:0 for metrics in PRED_COLS.values() for m in metrics})
    for date in dates:
        day_teams = week_preds.loc[week_preds.date == date]
        opp_selected_lineup = get_pct_lineups(day_teams, opponent_id)[0]
        opp_selected_team = week_results.loc[week_results.playerId.isin(opp_selected_lineup)&(week_results.date == date)]
        opp_points += get_resulted_points(opp_selected_team)
        
    current_lineup = teams[(teams.team_id == current_team_id)&(teams.index.get_level_values('date') == date)]
    current_lineup = current_lineup.merge(players, how='left', on='player_key').playerId.tolist()
    
    all_available_players = players[(~players.player_key.isin(starting_teams.player_key))|(players.player_key.isin(current_lineup))]
    all_available_players = all_available_players.playerId.tolist()
    
    if len(selected_team) > 0:
        current_lineup = [p for p in selected_team]
    
    own_points = pd.Series({m:0 for metrics in PRED_COLS.values() for m in metrics})
    
    selected_team = []
    while len(selected_team) < 17:
        if len(selected_team) < 14:
            available = [p for p in current_lineup if p not in selected_team]
            
        else:
            available = [p for p in all_available_players if p not in selected_team]
        week_team_to_check = []
        
        for date in dates:
            day_sel_team = week_preds[(week_preds.date == date)&(week_preds.playerId.isin(selected_team))].playerId.tolist()
            positions_quota, lineup_check_players = get_positions_quota(day_sel_team, position_lookup)
            
            players_to_check = []
            for player in available:
                if check_valid_lineup(lineup_check_players + [player], position_lookup, positions_quota):
                    players_to_check.append(player)
            week_team_to_check.append(week_preds[week_preds.playerId.isin(players_to_check)&(week_preds.date == date)])
        week_team_to_check = pd.concat(week_team_to_check).groupby('playerId')[metrics].sum()
        week_team_to_check += own_points
        test_points = opp_points * (len(selected_team) + 1)/14
        
        if len(week_team_to_check) > 0:
            expected_by_p = week_team_to_check.apply(lambda x: get_expected_points(x, opp_points), axis=1)
            selected_player = expected_by_p.idxmax()
            own_points = week_team_to_check.loc[selected_player] 
        else:
            selected_player = week_preds.loc[week_preds.playerId.isin(available)].groupby('playerId')[metrics].sum().rank(pct=True).sum(1).idxmax()
        
        selected_team.append(selected_player)
    print(get_expected_points(own_points, opp_points))
        
    print('DROPS')
    for p in current_lineup:
        if p not in selected_team:
            print(player_info.loc[p].to_dict())
    print('ADDS')
    for p in selected_team:
        if p not in current_lineup:
            print(player_info.loc[p].to_dict())


In [867]:
pass