In [1]:
%load_ext autoreload
%autoreload 2
from yfpy.query import YahooFantasySportsQuery
import sys
sys.path.append('../src')

In [47]:
def get_ts(t):
    return pd.Timestamp(t, unit='s', tz='UTC')\
        .tz_convert('US/Pacific')

def iso_get_ts(t):
    return pd.Timestamp(t).tz_convert('US/Pacific')

def get_gameweek(date):
    import requests
    games = []
    url = f'https://api-web.nhle.com/v1/schedule/{date}'
    r = requests.get(url)
    if r.status_code != 200:
        print(r.text)
    data = r.json()
    for week in data['gameWeek']:
        for game in week['games']:
            games.append({
                'gameId': game['id'],
                'home': game['homeTeam']['abbrev'],
                'away': game['awayTeam']['abbrev'],
                'ts': iso_get_ts(game['startTimeUTC']).date()
            })
    return games, data.get('nextStartDate')

def get_schedule(dates):
    games = []
    next_start = dates[0].strftime('%Y-%m-%d')
    for date in dates:
        if date >= pd.Timestamp(next_start):
            g, next_start = get_gameweek(date.strftime('%Y-%m-%d'))
            games += g
    games = [g for g in games if dates[0].date() <= g['ts'] <= dates[-1].date()]
    return games


In [3]:
def get_moves(t):
    moves = []
    for p in t.players:
        if p.transaction_data.destination_team_key:
            moves.append({'player': p.player_key, 'to': p.transaction_data.destination_team_key})
        if p.transaction_data.source_team_key:
            moves.append({'player': p.player_key, 'from': p.transaction_data.source_team_key})
    return moves


def argsort(seq):
    return sorted(range(len(seq)), key=seq.__getitem__)

def get_transactions(q):
    transactions = q.get_league_transactions()
    transactions = [transactions[i] for i in argsort([t.timestamp for t in transactions])]
    tr_by_date = {}
    for t in transactions:
        k = (get_ts(t.timestamp) + pd.Timedelta('1d')).date()
        if k not in tr_by_date.keys():
            tr_by_date[k] = []
        tr_by_date[k] += get_moves(t)
    return tr_by_date

In [4]:
def get_initial_teams(q):
    players = q.get_league_draft_results()
    teams = {}
    for p in players:
        if p.team_key not in teams:
            teams[p.team_key] = []
        teams[p.team_key].append(p.player_key)
    return teams

def get_teams(q):
    s = q.get_league_settings()
    dates = pd.date_range(get_ts(s.draft_time).strftime('%Y-%m-%d'),
              weeks[-1].end, freq='d').date

    from copy import deepcopy

    teams = {}
    transactions = get_transactions(q)
    current_teams = get_initial_teams(q)
    for date in dates:
        trs = transactions.get(date, [])
        for move in trs:
            if 'to' in move:
                current_teams[move['to']].append(move['player'])
            if 'from' in move:
                current_teams[move['from']].remove(move['player'])
        teams[date] = deepcopy(current_teams)

    df = pd.DataFrame(teams).T.melt(var_name='team_id', value_name='player', ignore_index=False)
    df = df.reset_index()
    df = df.join(df.player.explode(), rsuffix='_key', validate='one_to_many')
    df['index'] = pd.to_datetime(df['index'])
    df = df.drop('player', axis=1).set_index('index')
    df.index.name = 'date'
    return df

In [14]:
import pandas as pd
def get_players(q):
    players = q.get_league_players()
    pdf = pd.DataFrame([{
    'pos':p.display_position.split(','), 
    'name': p.name.full, 
    'player_key':p.player_key, 
    'team': p.editorial_team_abbr,
    'status': p.status 
     } for p in players])
    names_map = pd.read_csv('data/name_mapping.csv', index_col='name_match')
    bios = pd.read_csv('data/bios.csv')
    bios['name'] = bios[['name']].replace(names_map.to_dict())['name']
    pdf = pdf.merge(bios, how='inner', on='name', suffixes=('_yh', ''))
    return pdf

In [9]:
from get_data import load_history, load_current, combine_history, process_pca, process_y, load_bios, load_team_data
# load_history()
# load_current()
# combine_history()
# load_team_data()
# load_bios()
# process_pca()
# process_y()


Loaded bios, writing to csv.
Read in game data.
Processed pca, writing to csv
Processed y, writing to csv


In [29]:
game_id = 427
# game_id = 453

league_id = 21834
q = YahooFantasySportsQuery(
    game_id = game_id,
    league_id = league_id,
    game_code = 'nhl',
    auth_dir = '/home/jupyter/creds',
    browser_callback = False,
)

In [17]:
# weeks = q.get_game_weeks_by_game_id(game_id)
players = get_players(q)
# teams = get_teams(q)
# teams = teams.reset_index().merge(players, on='player_key', how='left')


2024-09-23 13:26:24.042 - ERROR - query.py - yfpy.query:291 - No data found when attempting extraction from fields: ['league', 'players']


In [56]:
from process_data import get_player_stats, get_team_stats, get_bios, run_training, INDEX_COLS, PRED_COLS
X_p, y = get_player_stats()
X_b = get_bios(X_p)
X_t = get_team_stats()

In [23]:
X = X_p.groupby('playerId').shift(1)
X = X.join(X_b)

X_pt = X_t.groupby('team').shift(1).copy()
X_pt['home'] = X_t['home'].copy()

X_pt.index.names = ['opposingTeam','gameId','gameDate']
X = X.join(X_pt, rsuffix='_opp')

X_pt.index.names = ['playerTeam','gameId','gameDate']
X = X.join(X_pt, rsuffix='_team')

X = X.reset_index().set_index(INDEX_COLS)
X = X[~X[[str(i) for i in range(35)]].isna().all(1)]
y = y.loc[X.index]

In [25]:
pipes = run_training(X, y, True)

fow -- 0.7414251367418632
fow -- 0.7901185749527758
goals -- 0.07357906986823237
goals -- 0.2720227817788389
assists -- 0.08688721114812958
assists -- 0.39888354828236905
shots -- 0.2257047788509583
shots -- 1.0684372334351537
hits -- 0.16546566719263378
hits -- 1.0118081149267546
blocks -- 0.2236169059349279
blocks -- 0.7654522918586214
pim -- 0.02645390322644614
pim -- 0.7137072948602097
goalsfor -- 0.03874478359033917
goalsfor -- 0.6289361285565018
goalsaga -- 0.025841764003764034
goalsaga -- 0.6291549350778155
ppp -- 0.10559534277704385
ppp -- 0.15195601095192351


## DRAFT

In [31]:
game_id = 427

q = YahooFantasySportsQuery(
    game_id = game_id,
    league_id = league_id,
    game_code = 'nhl',
    auth_dir = '/home/jupyter/creds',
    browser_callback = False,
)

all_matchups = []
for i in range(1, 27):
    all_matchups += q.get_league_matchups_by_week(i)
    
stats_map = {
    1: 'goals',
    2: 'assists',
    4: 'plusmin',
    5: 'pim',
    8: 'ppp',
    14: 'shots',
    16: 'fow',
    31: 'hits',
    32: 'blocks'
}
stats = []
for match in all_matchups:
    for mt in match.teams:
        for s in mt.team_stats.stats:
            stats.append({'id': s.stat_id, 'val': s.value})
stats = pd.DataFrame(stats)
stats['name'] = stats.id.apply(lambda x: stats_map.get(x, None))
s_exp = stats.groupby('name')['val'].mean()

In [35]:
game_id = 453
q = YahooFantasySportsQuery(
    game_id = game_id,
    league_id = league_id,
    game_code = 'nhl',
    auth_dir = '/home/jupyter/creds',
    browser_callback = False,
)

In [53]:
def get_week_data(dates, games, X_p, X_t, week):
    dates = pd.to_datetime(X_p.index.get_level_values('gameDate'), format='%Y%m%d')
    
    if week is not None:
        filtered = dates < week.start
    else:
        filtered = [True for i in range(len(dates))]
    
    player_stats = X_p[filtered].reset_index(level='playerTeam').groupby(['playerId']).last()
    
    dates = pd.to_datetime(X_t.index.get_level_values('gameDate'), format='%Y%m%d')
    
    if week is not None:
        filtered = dates < week.start
    else:
        filtered = [True for i in range(len(dates))]  
    
    team_stats = X_t[filtered].groupby('team').last()
    team_stats = team_stats.drop('home', axis=1)
    gs = pd.DataFrame(games)

    home_games = gs.merge(player_stats.reset_index(), left_on='home', right_on='playerTeam')
    home_games = home_games.merge(team_stats.reset_index(), left_on='home', right_on='team', suffixes=('', '_team'))
    home_games = home_games.merge(team_stats.reset_index(), left_on='away', right_on='team', suffixes=('', '_opp'))

    away_games = gs.merge(player_stats.reset_index(), left_on='away', right_on='playerTeam')
    away_games = away_games.merge(team_stats.reset_index(), left_on='away', right_on='team', suffixes=('', '_team'))
    away_games = away_games.merge(team_stats.reset_index(), left_on='home', right_on='team', suffixes=('', '_opp'))

    gs = pd.concat([home_games, away_games]).sort_values('gameId')
    gs['home_team'] = (gs['playerTeam']  == gs['home']).astype(int)
    gs['gameDate'] = pd.to_datetime(gs['ts'])
    gs['opposingTeam'] = gs.apply(lambda row: row['home'] if row['playerTeam'] == row['away'] else row['away'], axis=1)
    gs = gs.set_index(['gameId','gameDate','playerId','playerTeam', 'opposingTeam'])
    bios = get_bios(gs)
    gs = gs.join(bios)
    gs['home'] = 1 - gs['home_team']
    return gs

def get_opponent(m):
    m_t = [t.team_key for t in m.teams]
    if own_team in m_t:
        opponent = [t for t in m_t if t != own_team][0]
    return opponent

from scipy import stats
def get_added_val(x, opp_exp, stat_cols):
    vals = {}
    for col in stat_cols:
        if col == 'plusmin':
            vals[col] = stats.skellam.cdf(0, opp_exp['goalsfor']+x['goalsaga'], opp_exp['goalsaga']+x['goalsfor'])
        else:
            vals[col] = stats.skellam.cdf(0, opp_exp[col], x[col])
    return vals

def get_added_vals(all_preds, opp_exp=None, own_team_length=1):
    if opp_exp is None:
        opp_exp = get_dummy_team(all_preds, own_team_length)
    
    stat_cols = ['goals', 'assists', 'shots', 'hits', 'blocks', 'pim', 'fow', 'plusmin', 'ppp']
    total_vals = all_preds.apply(lambda x: pd.Series(get_added_val(x, opp_exp, stat_cols)), 1).fillna(0)
    return total_vals.sum(1)

In [37]:
def can_include_player(new_player, position_lookup, selected_players, position_limits, wildcard=None):
    from collections import defaultdict

    # Helper function to check if the current assignment of players to positions is valid
    def is_valid_assignment(assignments):
        counts = defaultdict(int)
        for player, pos in assignments.items():
            counts[pos] += 1
            if counts[pos] > position_limits.get(pos, 0):
                return False
        return True
    
    # Helper function for the backtracking algorithm
    def backtrack(assignments, players):
        if not players:
            return is_valid_assignment(assignments)
        
        current_player = players[0]
        possible_positions = position_lookup.get(current_player, [])
        
        # Include wildcard positions if the current player is the wildcard
        if current_player == wildcard:
            possible_positions = position_limits.keys()
        
        for pos in possible_positions:
            assignments[current_player] = pos
            if is_valid_assignment(assignments):
                if backtrack(assignments, players[1:]):
                    return True
            assignments.pop(current_player, None)
        return False
    
    # List of players to be considered (already selected + new player)
    all_players = selected_players + [new_player]
    
    # Initial assignments
    initial_assignments = {}
    
    # Run the backtracking algorithm
    return backtrack(initial_assignments, all_players)

In [108]:
games_df

NameError: name 'games_df' is not defined

In [38]:

def get_days_games(date, game_df, players, selected_team):
    position_lookup = players.set_index('playerId')['pos'].to_dict()
    position_limits = {
        'D': 4,
        'C': 2,
        'RW': 2,
        'LW': 2,
        'G': 0
    }
    todays_games = game_df]
    
    todays_players = todays_games[todays_games.index.get_level_values('playerId').isin(selected_team)].index.get_level_values('playerId').tolist()

    is_included = todays_games.reset_index(level='playerId')['playerId'].apply(lambda x: can_include_player(x, position_lookup, todays_players, position_limits, wildcard=None)).values
    is_inteam = todays_games.reset_index(level='playerId')['playerId'].isin(selected_team).values
    todays_games = todays_games[is_included|is_inteam]
    return todays_games

def get_week_games(dates, preds, players, selected_team):
    week_games = []
    for date in dates:
        week_games.append(get_days_games(date, preds, players, selected_team))
    week_games = pd.concat(week_games)
    return week_games
    

In [105]:
players

Unnamed: 0,pos,name,player_key,team_yh,status,playerId,position,team,birthDate,weight,height,nationality,shootsCatches,primaryNumber,primaryPosition
0,[G],Marc-Andre Fleury,453.p.3341,MIN,,8470594,G,MIN,1984-11-28,180.0,"6' 2""",CAN,L,29.0,G
1,[D],Ryan Suter,453.p.3345,STL,,8470600,D,DAL,1985-01-21,208.0,"6' 2""",USA,L,20.0,D
2,[D],Brent Burns,453.p.3358,CAR,,8470613,D,CAR,1985-03-09,230.0,"6' 5""",CAN,R,88.0,D
3,[RW],Corey Perry,453.p.3365,EDM,,8470621,R,EDM,1985-05-16,206.0,"6' 3""",CAN,R,10.0,R
4,[LW],Alex Ovechkin,453.p.3637,WSH,DTD,8471214,L,WSH,1985-09-17,235.0,"6' 3""",RUS,R,8.0,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
960,[D],Patrik Koch,453.p.31276,UTA,,8484326,D,ARI,,,,,,,
961,[D],Lane Hutson,453.p.31751,MTL,,8483457,D,MTL,,,,,,,
962,[LW],Cutter Gauthier,453.p.31759,ANA,,8483445,L,ANA,,,,,,,
963,[D],Elias Pettersson,453.p.32762,VAN,,8480012,C,VAN,1998-11-12,176.0,"6' 2""",SWE,L,40.0,C


In [41]:
def get_preds(pipes, gs):
    import numpy as np
    preds = {}
    for c in PRED_COLS:
        preds[c] = pipes[c].predict(gs[pipes[c].feature_names_in_])
        preds[c] = np.clip(preds[c], 0, np.inf)

    preds = pd.DataFrame(preds, index=gs.index)
    return preds

def get_results(dates, preds, players, selected_team):
    player_ids = get_week_games(dates, preds, players.iloc[:0], selected_team)
    player_ids = player_ids.join(players.groupby('playerId')['player_key'].first()).groupby('gameDate')['player_key'].apply(list).to_dict()
    stats_map = {
        1: 'goals',
        2: 'assists',
        4: 'plusmin',
        5: 'pim',
        8: 'ppp',
        14: 'shots',
        16: 'fow',
        31: 'hits',
        32: 'blocks'
    }
    results = []
    for d, ps in player_ids.items():
        for p in ps:
            pstats = q.get_player_stats_by_date(p, d.strftime('%Y-%m-%d')).player_stats.stats
            pstats = {stats_map[s.stat_id]: s.value for s in pstats}
            results.append(pstats)
    return pd.DataFrame(results).sum()

In [None]:
dates = pd.date_range('2024-09-23', '2025-06-01')
games = get_schedule(dates)


In [104]:
# dates = pd.date_range('2024-10-01', '2025-06-01')
# games = get_schedule(dates)
# gs = get_week_data(dates, games, X_p, X_t, week=None)

# preds = get_preds(pipes, gs)


opp_exp = pd.DataFrame(s_exp).T
opp_exp['goalsfor'] = opp_exp['plusmin']
opp_exp['goalsaga'] = 0


selected_team = [
    8478550,
    8478542,
    8476455
    
]
taken = [
    8471214,
    8480012,
    8475798,
    8470600         
]

player_info = players.set_index('playerId')[['name', 'pos']]
while len(selected_team) < 14:
    week_ind_preds = get_week_games(dates, preds, players, selected_team)
    week_preds = week_ind_preds.groupby('playerId').sum()

    current_exp = week_preds[~week_preds.index.isin(taken+selected_team)].copy()

    if len(selected_team) == 0:
        current_exp = week_preds.iloc[[0]] * 0
    
    exp = get_added_vals(current_exp, opp_exp.sum())
    selected_team.append(exp.idxmax())
    print('TEAM\n', player_info.loc[selected_team])

TEAM
                        name      pos
playerId                            
8478550      Artemi Panarin     [LW]
8478542      Evan Rodrigues  [C, LW]
8476455   Gabriel Landeskog     [LW]
8479318     Auston Matthews      [C]
TEAM
                        name      pos
playerId                            
8478550      Artemi Panarin     [LW]
8478542      Evan Rodrigues  [C, LW]
8476455   Gabriel Landeskog     [LW]
8479318     Auston Matthews      [C]
8476468         J.T. Miller  [C, RW]
TEAM
                        name      pos
playerId                            
8478550      Artemi Panarin     [LW]
8478542      Evan Rodrigues  [C, LW]
8476455   Gabriel Landeskog     [LW]
8479318     Auston Matthews      [C]
8476468         J.T. Miller  [C, RW]
8478493    Joel Eriksson Ek      [C]
TEAM
                        name      pos
playerId                            
8478550      Artemi Panarin     [LW]
8478542      Evan Rodrigues  [C, LW]
8476455   Gabriel Landeskog     [LW]
8479318     Au

In [103]:
player_info.name.to_dict()

{8470594: 'Marc-Andre Fleury',
 8470600: 'Ryan Suter',
 8470613: 'Brent Burns',
 8470621: 'Corey Perry',
 8471214: 'Alex Ovechkin',
 8471215: 'Evgeni Malkin',
 8471218: 'Blake Wheeler',
 8471675: 'Sidney Crosby',
 8471677: 'Jack Johnson',
 8471685: 'Anze Kopitar',
 8471698: 'T.J. Oshie',
 8471709: 'Marc-Edouard Vlasic',
 8473446: 'Erik Johnson',
 8473533: 'Jordan Staal',
 8473563: 'Nicklas Backstrom',
 8473449: 'Kyle Okposo',
 8473453: 'Trevor Lewis',
 8473512: 'Claude Giroux',
 8473575: 'Semyon Varlamov',
 8473422: 'Nick Foligno',
 8471724: 'Kris Letang',
 8471734: 'Jonathan Quick',
 8471817: 'Ryan Reaves',
 8473504: 'Cal Clutterbuck',
 8474141: 'Patrick Kane',
 8474037: 'James van Riemsdyk',
 8474053: 'Logan Couture',
 8474151: 'Ryan McDonagh',
 8474189: 'Lars Eller',
 8474013: 'Ian Cole',
 8474157: 'Max Pacioretty',
 8474150: 'Mikael Backlund',
 8474102: 'David Perron',
 8474090: 'Brendan Smith',
 8473419: 'Brad Marchand',
 8474034: 'Pat Maroon',
 8473503: 'James Reimer',
 8474166: 