In [245]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../src')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from get_data import load_history, load_current, combine_history, process_y, load_bios, load_team_data
# load_history()
load_current()
combine_history()
load_team_data()
load_bios()
process_y()

In [432]:
from itertools import combinations, product, chain
from ast import literal_eval
import collections

POSITIONS_QUOTA = {
        'D': 4,
        'C': 2,
        'RW': 2,
        'LW': 2,
        'G': 2,
        'WC': 1
}

def check_valid_lineup(lineup, position_lookup, positions_quota=None):
    if len(lineup) < 3:
        return True
    if positions_quota is None:
        positions_quota = {k: v for k, v in POSITIONS_QUOTA.items()}
    
    positions_map = {k:[] for k in positions_quota}
    multi_positions = []
    for player in lineup:
        if len(position_lookup[player]) == 1:
            pos = position_lookup[player][0]
            if len(positions_map[pos]) >= positions_quota[pos]:
                if len(positions_map['WC']) == 0:
                    positions_map['WC'] = [player]
                else:
                    return False
            else:
                positions_map[pos] += [player]     
        else:
            multi_positions.append(player)

    positions_left = [p for p, v in positions_map.items() for i in range(positions_quota[p]-len(v))]
    multi_pos_positions = [position_lookup[p] for p in multi_positions]
    if len(multi_pos_positions) == 0:
        return True
    for c in list(product(*multi_pos_positions)):
        if collections.Counter(c) == collections.Counter(positions_left) or len(c) < 13:
            return True
    return False
    

def get_valid_lineups(day_teams, min_players=13):
    position_lookup = day_teams.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()
    n_combs = min(min_players, len(position_lookup))
    lineups = list(combinations(position_lookup.keys(), n_combs))

    
    valid_lineups = [l for l in lineups if check_valid_lineup(l, position_lookup) != False and len(l) > 0]
    if len(valid_lineups) == 0:
        if n_combs > 2:
            return get_valid_lineups(day_teams, min_players=n_combs-1)
    return valid_lineups

def get_lineup_teams(lineups, day_teams):
    teams = []
    for lineup in lineups:
        match_filter = (day_teams.playerId.isin(lineup))
        teams.append(day_teams[match_filter])
    return pd.concat(teams, keys=range(len(teams)), names=['lineup'])


from scipy import stats
from process_data import PRED_COLS

def get_resulted_points(lineup):
    metrics = [m for metrics in PRED_COLS.values() for m in metrics]
    res = lineup[metrics].sum()
    return res

def get_pct_lineups(day_teams, team_id):
    lineups = get_valid_lineups(day_teams[day_teams.team_id == team_id])
    if len(lineups) == 0:
        return [()]
    lineup_teams = get_lineup_teams(lineups, day_teams)
    ranksum = lineup_teams.groupby('lineup')['rank'].sum()
    best = ranksum.rank(pct=True).sort_values().index.tolist()
    return [lineups[i] for i in best]

In [660]:
import yahoo_utils

game_id = 453
players = yahoo_utils.get_players(game_id)
players = players[~players.playerId.duplicated()]
player_info = players.set_index('playerId')[['name','pos','team']]
players['team_yh'] = players.team_yh.replace({
    'SJ':'SJS',
    'LA':'LAK',
    'TB':'TBL',
    'NJ':'NJD'
})
current_schedule = yahoo_utils.get_games_by_week(game_id)
teams = yahoo_utils.get_teams(game_id, True)

q = yahoo_utils.get_q(game_id)
info = []
for team in teams.team_id.drop_duplicates():
    info.append(q.get_team_info(team.split('.')[-1]))

In [518]:
from ast import literal_eval
from process_data import PRED_COLS
current_team = [t for t in info if t.name.decode('utf8')=='Kiitos ryhmään pääsystä'][0]
position_lookup = players.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()
metrics = [m for metrics in PRED_COLS.values() for m in metrics]


import pandas as pd
preds = pd.read_csv('data/latest_preds.csv')
preds = preds.drop(['playerTeam','opposingTeam'], axis=1).drop_duplicates()
preds = preds.sort_values('gameId').groupby('playerId').last()
date_now = pd.to_datetime('now').date()
preds['plusmin'] = preds['goalsfor'] - preds['goalsaga']
preds['ga'] = -preds['ga'] / preds['icetime']

In [658]:
games_list = []
for k, v in current_schedule.items():
    for r in v:
        a = {'week':k}
        a.update(r)
        games_list.append(a)
games_df = pd.DataFrame(games_list)
games_df['ts'] = pd.to_datetime(games_df['ts'])
player_games = pd.concat([
    games_df.merge(players, how='left', left_on='home', right_on='team_yh'),
    games_df.merge(players, how='left', left_on='away', right_on='team_yh')
])[['week','gameId','ts','pos','name','playerId']]
player_games = player_games.dropna()

In [544]:
def get_positions_quota(selected_team, pos_lookup):
    positions_quota = {k: v for k, v in POSITIONS_QUOTA.items()}
    lineup_check_players = []
    for player in selected_team:
        if len(pos_lookup[player]) == 1:
            positions_quota[pos_lookup[player][0]] -= 1
        else:
            lineup_check_players.append(player)
    return positions_quota, lineup_check_players


def get_rest_of_season_games(date_now, player_games, selected_team):
    player_rest_of_season_games = {p:0 for p in player_games.playerId.unique()}
    for date in pd.date_range(date_now, player_games.ts.max()):
        available_games = player_games[(player_games.ts == date)]
        day_sel_team = available_games[(available_games.playerId.isin(selected_team))]
        positions_quota, lineup_check_players = get_positions_quota(day_sel_team.playerId.tolist(), position_lookup)
        for player in available_games.playerId.tolist():
            if player not in selected_team:
                if check_valid_lineup(lineup_check_players + [player], position_lookup, positions_quota):
                    player_rest_of_season_games[player] += 1
    return pd.Series(player_rest_of_season_games)

Unnamed: 0,week,gameId,ts,pos,name,playerId
0,1,2024020001,2024-10-04,['RW'],Kyle Okposo,8473449
1,1,2024020001,2024-10-04,['G'],James Reimer,8473503
2,1,2024020001,2024-10-04,['LW'],Jason Zucker,8475722
3,1,2024020001,2024-10-04,['D'],Connor Clifton,8477365
4,1,2024020001,2024-10-04,['RW'],Alex Tuch,8477949
...,...,...,...,...,...,...
1742,1,2024020041,2024-10-13,['C'],Nikita Nesterenko,8481754
1743,1,2024020041,2024-10-13,['D'],Jackson LaCombe,8481605
1744,1,2024020041,2024-10-13,['C'],Leo Carlsson,8484153
1745,1,2024020041,2024-10-13,['RW'],Sam Colangelo,8482118


In [683]:
players[(players.player_key.isin(starting_teams.player_key))]

Unnamed: 0,pos,name,player_key,team_yh,status,playerId,position,team,birthDate,weight,height,nationality,shootsCatches,primaryNumber,primaryPosition
2,['D'],Brent Burns,453.p.3358,CAR,,8470613,D,CAR,1985-03-09,230.0,"6' 5""",CAN,R,88.0,D
4,['LW'],Alex Ovechkin,453.p.3637,WSH,,8471214,L,WSH,1985-09-17,235.0,"6' 3""",RUS,R,8.0,L
7,['C'],Sidney Crosby,453.p.3737,PIT,,8471675,C,PIT,1987-08-07,200.0,"5' 11""",CAN,L,87.0,C
20,['D'],Kris Letang,453.p.4064,PIT,,8471724,D,PIT,1987-04-24,201.0,"6' 0""",CAN,R,58.0,D
34,['LW'],Brad Marchand,453.p.4351,BOS,DTD,8473419,L,BOS,1988-05-11,181.0,"5' 9""",CAN,L,63.0,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,['D'],Brock Faber,453.p.30075,MIN,,8482122,D,MIN,2002-08-22,200.0,"6' 1""",USA,R,7.0,D
937,['G'],Devon Levi,453.p.30111,BUF,,8482221,G,BUF,2001-12-27,184.0,"6' 0""",CAN,L,27.0,G
952,['LW'],Matthew Knies,453.p.30227,TOR,,8482720,L,TOR,2002-10-17,210.0,"6' 2""",USA,L,23.0,L
957,['C'],Connor Bedard,453.p.30544,CHI,,8484144,C,CHI,2005-07-17,185.0,"5' 10""",CAN,R,98.0,C


In [684]:
cats = ['g','a','sog','fow','hit','block','pim','plusmin','ga','win','so','save']
m = [m for m in current_team.matchups if pd.to_datetime(m.week_start).date() >= date_now][0]
print(m.week)
opponent_id = [t.team_key for t in m.teams if t.team_key != current_team.team_key][0]
dates = pd.date_range(max(pd.to_datetime(m.week_start).date(), date_now), m.week_end)


ranks = preds.copy()
ranks['plusmin'] = ranks['goalsfor'] - ranks['goalsaga']
ranks['ga'] = -ranks['ga'] / ranks['icetime']
ranks = ranks.drop('icetime', axis=1)
ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
ranks.name = 'rank'

week_teams = teams.loc[(pd.to_datetime(teams.index) >= m.week_start)&(pd.to_datetime(teams.index) <= m.week_end)]

current_lineup = teams[(teams.team_id == current_team.team_key)&(teams.index.get_level_values('date') == m.week_start)]
current_lineup = current_lineup.merge(players, how='left', on='player_key').playerId.tolist()


selected_team = []
    
for date in dates:
    rankings = []
    print('\n\n\n', date.date())
    if len(selected_team) > 0:
    #     current_lineup = [p for p in selected_team]
        selected_team = []

    starting_teams = teams.loc[(pd.to_datetime(teams.index) == date)]
    all_available_players = players[(~players.player_key.isin(starting_teams.player_key))|(players.playerId.isin(current_lineup))]
    all_available_players = all_available_players.playerId.tolist()

    week_games = player_games[(player_games.ts >= date)&(player_games.ts <= m.week_end)]

    

    while len(selected_team) < 25:
        print(str(len(selected_team)), end='\r')
        
        if len(selected_team) < 14:
            available = [p for p in current_lineup if p not in selected_team]

        else:
            available = [p for p in all_available_players if p not in selected_team]
            
        rest_games = get_rest_of_season_games(date, player_games, selected_team)
        stats_available = rest_games[rest_games.index.isin(preds.index)].index
        ranks = preds.loc[stats_available, cats].apply(lambda x: x * rest_games[stats_available])
        ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
        
        week_rest_games = get_rest_of_season_games(date, week_games, selected_team)
        week_stats_available = week_rest_games[week_rest_games.index.isin(preds.index)].index
        week_ranks = preds.loc[stats_available, cats].apply(lambda x: x * week_rest_games[week_stats_available])
        week_ranks = ((week_ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)

        if len(selected_team) < 14:
            selected_player = ranks.loc[[p for p in available if p in ranks]].idxmax()
            selected_team.append(selected_player)
            rankings.append({'playerId':selected_player, 'rank': round(ranks.loc[selected_player], 3), 'week_rank': round(week_ranks.loc[selected_player], 3), 'games':rest_games.loc[selected_player]})

        else:
            rest_of_them = ranks[[p for p in available if p in stats_available]].sort_values().iloc[-(25-len(selected_team)):].index.tolist()
            for p in rest_of_them:
                rankings.append({'playerId':p, 'rank': round(ranks.loc[p], 3), 'week_rank': round(week_ranks.loc[p], 3), 'games':rest_games.loc[p]})
            selected_team += rest_of_them
            
    for p in current_lineup:
        if p not in selected_team:
            rankings.append({'playerId':p, 'rank': round(ranks.loc[p], 3), 'week_rank': round(week_ranks.loc[p], 3), 'games':rest_games.loc[p]})
    rankings = pd.DataFrame(rankings).set_index('playerId')

    print(player_info.loc[current_lineup])
    print('DROPS')
    for p in current_lineup:
        if p not in selected_team:
            
            print(player_info.join(n_games, on='team').join(rankings).loc[p].to_dict())
    print('ADDS')
    for p in selected_team:
        if p not in current_lineup:
            print(player_info.join(n_games, on='team').join(rankings).loc[p].to_dict())
            
    break

1



 2024-10-04
                          name           pos team
playerId                                         
8478420         Mikko Rantanen        ['RW']  COL
8474564         Steven Stamkos   ['C', 'LW']  TBL
8480012       Elias Pettersson   ['C', 'LW']  VAN
8480018            Nick Suzuki         ['C']  MTL
8482116            Tim Stutzle   ['C', 'LW']  OTT
8478445          Mathew Barzal   ['C', 'RW']  NYI
8475166           John Tavares         ['C']  TOR
8481557             Matt Boldy  ['LW', 'RW']  MIN
8477986        Brandon Montour         ['D']  FLA
8476853          Morgan Rielly         ['D']  TOR
8476460         Mark Scheifele         ['C']  WPG
8474590           John Carlson         ['D']  WSH
8478407             Vince Dunn         ['D']  SEA
8478971          Connor Ingram         ['G']  ARI
8478492          Ilya Samsonov         ['G']  TOR
8480045   Ukko-Pekka Luukkonen         ['G']  BUF
8478366          Frank Vatrano   ['C', 'LW']  ANA
8475798        Mikael Granlund   