In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../src')

## Update data

In [None]:
from get_data import load_history, load_current, combine_history, process_y, load_bios, load_team_data

# load_history()

load_current()
combine_history()
load_team_data()
load_bios()
process_y()

## Make predictions

In [689]:
from process_data import get_rest_of_season_player_stats, get_rest_of_season_stats, PRED_COLS
from model_training import get_simple_pipelines, load_player_feature_map


X, y = get_rest_of_season_player_stats('skater')
skater_latest = X.groupby('playerId').last()


preds = {}
skaters_p_feats = load_player_feature_map('skater2', data=(X,y))
sk_p_cols = list(set([c for s in skaters_p_feats.values() for c in s[0]]))


X_g, y_g = get_rest_of_season_player_stats('goalie')
goalie_latest = X_g.groupby('playerId').last()

goalie_preds = {}
goalies_p_feats = load_player_feature_map('goalie2', (X_g, y_g))

pipelines = get_simple_pipelines((X, y), (X_g, y_g))

preds = {}
goalie_preds = {}

for col in PRED_COLS['skater']:    
    preds[col] = pipelines['skater'][col].predict(skater_latest.dropna()[skaters_p_feats[col][0]])
preds_df = pd.DataFrame(preds, index=skater_latest.dropna().index)

gl_p_cols = list(set([c for s in goalies_p_feats.values() for c in s[0]]))
for col in PRED_COLS['goalie']:
    goalie_preds[col] = pipelines['goalie'][col].predict(goalie_latest.dropna()[goalies_p_feats[col][0]])
goalie_preds_df = pd.DataFrame(goalie_preds, index=goalie_latest.dropna().index)

preds_df = pd.concat([preds_df, goalie_preds_df], axis=0)
preds_df.to_csv('data/latest_preds.csv')

Unnamed: 0_level_0,g,a,sog,fow,hit,block,pim,goalsfor,goalsaga,ppp,ga,win,so,save,icetime
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
8465009,0.045987,0.203214,1.069095,0.050264,1.504442,1.357844,0.491997,0.589483,0.697072,0.005660,,,,,
8466138,0.119814,0.211401,0.926876,2.341769,0.670322,0.380770,0.268369,0.347779,0.404593,0.062495,,,,,
8469455,0.121453,0.200454,1.261757,3.772475,0.551920,0.293347,0.340210,0.315787,0.328726,0.038395,,,,,
8470281,0.039385,0.318143,1.143072,0.050264,0.259994,1.832588,0.367906,0.741612,0.609819,0.054710,,,,,
8470595,0.205747,0.249827,1.513214,3.796418,0.458210,0.519623,0.298087,0.522037,0.477738,0.018520,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8483158,,,,,,,,,,,0.317486,0.047830,0.895382,2.586446,0.991773
8483530,,,,,,,,,,,0.531551,0.089471,0.816721,4.635340,1.666460
8483575,,,,,,,,,,,0.317486,0.047830,0.895382,2.586446,0.991773
8484293,,,,,,,,,,,0.317486,0.047830,0.895382,2.586446,0.991773


In [27]:
def load_preds():
    preds = pd.read_csv('data/latest_preds.csv')
    date_now = pd.to_datetime('now').date()
    preds['plusmin'] = preds['goalsfor'] - preds['goalsaga']
    preds['ga'] = -preds['ga'] / preds['icetime']
    return preds.set_index('playerId')

## Load yahoo data

In [16]:
import yahoo_utils
import lineup_utils
from process_data import PRED_COLS

from ast import literal_eval
import pandas as pd

game_id = 453
players = yahoo_utils.get_players(game_id)
players = players[~players.playerId.duplicated()]
player_info = players.set_index('playerId')[['name','pos','team']]
players['team_yh'] = players.team_yh.replace({
    'SJ':'SJS',
    'LA':'LAK',
    'TB':'TBL',
    'NJ':'NJD'
})
current_schedule = yahoo_utils.get_games_by_week(game_id)
teams = yahoo_utils.get_teams(game_id, True)

q = yahoo_utils.get_q(game_id)
info = []
for team in teams.team_id.drop_duplicates():
    info.append(q.get_team_info(team.split('.')[-1]))
    

current_team = [t for t in info if t.name.decode('utf8')=='Kiitos ryhmään pääsystä'][0]
position_lookup = players.set_index(['playerId'], drop=False).pos.apply(literal_eval).to_dict()
metrics = [m for metrics in PRED_COLS.values() for m in metrics]



preds = load_preds()

games_list = []
for k, v in current_schedule.items():
    for r in v:
        a = {'week':k}
        a.update(r)
        games_list.append(a)
games_df = pd.DataFrame(games_list)
games_df['ts'] = pd.to_datetime(games_df['ts'])
player_games = pd.concat([
    games_df.merge(players, how='left', left_on='home', right_on='team_yh'),
    games_df.merge(players, how='left', left_on='away', right_on='team_yh')
])[['week','gameId','ts','pos','name','playerId']]
player_games = player_games.dropna()

## Run selection loop

In [88]:

date_now = pd.Timestamp.now().date()
cats = ['g','a','sog','fow','hit','block','pim','plusmin','ga','win','so','save']
m = [m for m in current_team.matchups if pd.to_datetime(m.week_end).date() >= date_now][0]
print(m.week)
opponent_id = [t.team_key for t in m.teams if t.team_key != current_team.team_key][0]
dates = pd.date_range(max(pd.to_datetime(m.week_start).date(), date_now), m.week_end)


ranks = preds.copy()
ranks['plusmin'] = ranks['goalsfor'] - ranks['goalsaga']
ranks['ga'] = -ranks['ga'] / ranks['icetime']
ranks = ranks.drop('icetime', axis=1)
ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
ranks.name = 'rank'

week_teams = teams.loc[(pd.to_datetime(teams.index) >= m.week_start)&(pd.to_datetime(teams.index) <= m.week_end)]

current_lineup = teams[(teams.team_id == current_team.team_key)&(teams.index.get_level_values('date') == m.week_start)]
current_lineup = current_lineup.merge(players, how='left', on='player_key').playerId.tolist()


selected_team = []
    
for date in dates:
    rankings = []
    print('\n\n\n', date.date())
    if len(selected_team) > 0:
    #     current_lineup = [p for p in selected_team] # this is inactive since we use our existing lineup as starting point for each day of the week
        selected_team = []

    starting_teams = teams.loc[(pd.to_datetime(teams.index) == date)]
    all_available_players = players[(~players.player_key.isin(starting_teams.player_key))|(players.playerId.isin(current_lineup))]
    all_available_players = all_available_players.playerId.tolist()

    week_games = player_games[(player_games.ts >= date)&(player_games.ts <= m.week_end)]

    

    while len(selected_team) < 25:
        print(str(len(selected_team)), end='\r')
        
        if len(selected_team) < 14:
            available = [p for p in current_lineup if p not in selected_team]

        else:
            available = [p for p in all_available_players if p not in selected_team]
            
        rest_games = lineup_utils.get_rest_of_season_games(date, player_games, selected_team, position_lookup)
        stats_available = rest_games[rest_games.index.isin(preds.index)].index
        ranks = preds.loc[stats_available, cats].apply(lambda x: x * rest_games[stats_available])
        ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
        ranks.name = 'rank'
        
        week_rest_games = lineup_utils.get_rest_of_season_games(date, week_games, selected_team, position_lookup)
        week_stats_available = week_rest_games[week_rest_games.index.isin(preds.index)].index
        week_ranks = preds.loc[stats_available, cats].apply(lambda x: x * week_rest_games[week_stats_available])
        week_ranks = ((week_ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
        week_ranks.name = 'week_rank'

        if len(selected_team) < 14:
            selected_player = ranks.loc[[p for p in available if p in ranks]].idxmax()
            selected_team.append(selected_player)
            rankings.append({'playerId':selected_player, 'rank': round(ranks.loc[selected_player], 3), 'week_rank': round(week_ranks.loc[selected_player], 3), 'games':rest_games.loc[selected_player]})

        else:
            rest_of_them = ranks[[p for p in available if p in stats_available]].sort_values(ascending=False).iloc[:(25-len(selected_team))].index.tolist()
            for p in rest_of_them:
                rankings.append({'playerId':p, 'rank': round(ranks.loc[p], 3), 'week_rank': round(week_ranks.loc[p], 3), 'games':rest_games.loc[p]})
            selected_team += rest_of_them
            
    for p in current_lineup:
        if p not in selected_team:
            rankings.append({'playerId':p, 'rank': round(ranks.loc[p], 3), 'week_rank': round(week_ranks.loc[p], 3), 'games':rest_games.loc[p]})
    rankings = pd.DataFrame(rankings).set_index('playerId')
    
    n_games = week_rest_games[week_rest_games.index.isin(preds.index)]
    n_games.name = 'n_games'
    print(player_info.loc[current_lineup])
    print('DROPS')
    for p in current_lineup:
        if p not in selected_team:
            
            print(player_info.join(n_games).join(rankings).loc[p].to_dict())
    print('ADDS')
    for p in selected_team:
        if p not in current_lineup:
            print(player_info.join(n_games).join(rankings).loc[p].to_dict())
            
    break

1



 2024-10-06
                          name           pos team
playerId                                         
8478420         Mikko Rantanen        ['RW']  COL
8474564         Steven Stamkos   ['C', 'LW']  TBL
8480012       Elias Pettersson   ['C', 'LW']  VAN
8480018            Nick Suzuki         ['C']  MTL
8482116            Tim Stutzle   ['C', 'LW']  OTT
8478445          Mathew Barzal   ['C', 'RW']  NYI
8475166           John Tavares         ['C']  TOR
8481557             Matt Boldy  ['LW', 'RW']  MIN
8477986        Brandon Montour         ['D']  FLA
8476853          Morgan Rielly         ['D']  TOR
8476460         Mark Scheifele         ['C']  WPG
8474590           John Carlson         ['D']  WSH
8478407             Vince Dunn         ['D']  SEA
8478971          Connor Ingram         ['G']  ARI
8478492          Ilya Samsonov         ['G']  TOR
8480045   Ukko-Pekka Luukkonen         ['G']  BUF
8478366          Frank Vatrano   ['C', 'LW']  ANA
8475798        Mikael Granlund   

In [129]:
rest_games

8473449    80
8473503    80
8475722    80
8477365    80
8477949    80
           ..
8481178    82
8481546    82
8481553    82
8482142    82
8482159    82
Length: 969, dtype: int64

In [92]:
names = ['Mikko Rantanen', 'Sidney Crosby']
player_info.join(ranks).join(week_ranks)[player_info.name.isin(names)]


Unnamed: 0_level_0,name,pos,team,rank,week_rank
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8471675,Sidney Crosby,['C'],PIT,0.987715,0.95086
8478420,Mikko Rantanen,['RW'],COL,0.984029,0.737101


In [152]:
ranks

playerId
8465009    0.506280
8466138    0.144928
8469455    0.173913
8470281    0.667633
8470595    0.444444
             ...   
8483158    0.685990
8483530    0.516908
8483575    0.685990
8484293    0.685990
8484312    0.620290
Name: rank, Length: 1035, dtype: float64

In [175]:
ranks = preds.copy()
ranks['plusmin'] = ranks['goalsfor'] - ranks['goalsaga']
ranks['ga'] = -ranks['ga'] / ranks['icetime']
ranks = ranks.drop('icetime', axis=1)
ranks = ((ranks - ranks.mean())/(ranks.std())).mean(1).rank(pct=True)
full_n_games = player_games[player_games.ts.dt.date > date_now].groupby('playerId').gameId.count()
full_n_games = pd.DataFrame(ranks).join(full_n_games).fillna(0).gameId
week_n_games = player_games[(player_games.ts.dt.date > date_now)&(player_games.ts <= m.week_end)].groupby('playerId').gameId.count()
week_n_games = pd.DataFrame(ranks).join(week_n_games).fillna(0).gameId

week_ranks = ranks * week_n_games
ranks = ranks * full_n_games
ranks.name = 'rank'
week_ranks.name ='week_rank'

prankings = players.set_index('playerId').join(ranks, how='inner').join(week_ranks, rsuffix='_week').merge(starting_teams, how='left', on='player_key')

ss = []
for t in info:
    a = prankings.loc[prankings.team_id == t.team_key].copy()
    a['team'] = t.name.decode()
    a['manager'] = t.managers[0].nickname
    ss.append(a)

ss = pd.concat(ss)
ss.groupby(['team','manager'])[['rank','week_rank']].mean().sort_values('rank', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank,week_rank
team,manager,Unnamed: 2_level_1,Unnamed: 3_level_1
Kiitos ryhmään pääsystä,Lauru,74.185558,2.278057
Miehen käsi,Riku,71.951637,2.399195
Okkin Lövy,Joel,71.918159,2.131003
Kristinusko,Arvo,71.65081,2.245411
Saakos niitä kaupasta,Juuso,69.982931,2.172947
Haukion Kala,Vesa,69.169673,2.219109
Seksi,Vesa-Ville,68.959152,2.244498
Ruumiinkuljetusluvat,Tuomo,67.062497,1.846123


In [179]:
ss[ss.manager == 'Arvo']

Unnamed: 0,pos,name,player_key,team_yh,status,position,team,birthDate,weight,height,nationality,shootsCatches,primaryNumber,primaryPosition,rank,week_rank,team_id,manager
34,['LW'],Brad Marchand,453.p.4351,BOS,DTD,L,Miehen käsi,1988-05-11,181.0,"5' 9""",CAN,L,63.0,L,67.73913,2.478261,453.l.15482.t.8,Riku
39,['D'],Drew Doughty,453.p.4472,LAK,O,D,Miehen käsi,1989-12-08,202.0,"6' 1""",CAN,R,8.0,D,73.601932,1.795169,453.l.15482.t.8,Riku
55,['C'],Nazem Kadri,453.p.4687,CGY,,C,Miehen käsi,1990-10-06,192.0,"6' 0""",CAN,L,43.0,C,80.573913,2.947826,453.l.15482.t.8,Riku
68,['C'],Ryan O'Reilly,453.p.4786,NSH,,C,Miehen käsi,1991-02-07,216.0,"6' 1""",CAN,L,90.0,C,75.782609,2.73913,453.l.15482.t.8,Riku
75,['D'],Roman Josi,453.p.4930,NSH,,D,Miehen käsi,1990-06-01,201.0,"6' 1""",CHE,L,59.0,D,82.438647,2.97971,453.l.15482.t.8,Riku
129,['D'],Mattias Ekholm,453.p.5329,EDM,,D,Miehen käsi,1990-05-24,215.0,"6' 4""",SWE,L,14.0,D,77.800966,2.846377,453.l.15482.t.8,Riku
225,['G'],Connor Hellebuyck,453.p.5820,WPG,,G,Miehen käsi,1993-05-19,207.0,"6' 4""",USA,L,37.0,G,61.480193,2.249275,453.l.15482.t.8,Riku
230,['G'],Linus Ullmark,453.p.5853,OTT,,G,Miehen käsi,1993-07-31,213.0,"6' 4""",SWE,L,35.0,G,40.801932,0.995169,453.l.15482.t.8,Riku
263,['G'],Tristan Jarry,453.p.6024,PIT,,G,Miehen käsi,1995-04-29,194.0,"6' 2""",CAN,L,35.0,G,38.345894,1.402899,453.l.15482.t.8,Riku
325,"['C', 'RW']",Adrian Kempe,453.p.6395,LAK,,R,Miehen käsi,1996-09-13,201.0,"6' 2""",SWE,L,9.0,C,75.899517,1.851208,453.l.15482.t.8,Riku
