In [1]:
%load_ext autoreload
%autoreload 2
from yfpy.query import YahooFantasySportsQuery
import sys
sys.path.append('../src')

In [3]:
def get_moves(t):
    moves = []
    for p in t.players:
        if p.transaction_data.destination_team_key:
            moves.append({'player': p.player_key, 'to': p.transaction_data.destination_team_key})
        if p.transaction_data.source_team_key:
            moves.append({'player': p.player_key, 'from': p.transaction_data.source_team_key})
    return moves


def argsort(seq):
    return sorted(range(len(seq)), key=seq.__getitem__)

def get_transactions(q):
    transactions = q.get_league_transactions()
    transactions = [transactions[i] for i in argsort([t.timestamp for t in transactions])]
    tr_by_date = {}
    for t in transactions:
        k = (get_ts(t.timestamp) + pd.Timedelta('1d')).date()
        if k not in tr_by_date.keys():
            tr_by_date[k] = []
        tr_by_date[k] += get_moves(t)
    return tr_by_date

In [4]:
def get_initial_teams(game_id):
    q = get_q(game_id)
    players = q.get_league_draft_results()
    teams = {}
    for p in players:
        if p.team_key not in teams:
            teams[p.team_key] = []
        teams[p.team_key].append(p.player_key)
    return teams

def get_teams(game_id):
    q = get_q(game_id)
    s = q.get_league_settings()
    dates = pd.date_range(get_ts(s.draft_time).strftime('%Y-%m-%d'),
              weeks[-1].end, freq='d').date

    from copy import deepcopy

    teams = {}
    transactions = get_transactions(q)
    current_teams = get_initial_teams(q)
    for date in dates:
        trs = transactions.get(date, [])
        for move in trs:
            if 'to' in move:
                current_teams[move['to']].append(move['player'])
            if 'from' in move:
                current_teams[move['from']].remove(move['player'])
        teams[date] = deepcopy(current_teams)

    df = pd.DataFrame(teams).T.melt(var_name='team_id', value_name='player', ignore_index=False)
    df = df.reset_index()
    df = df.join(df.player.explode(), rsuffix='_key', validate='one_to_many')
    df['index'] = pd.to_datetime(df['index'])
    df = df.drop('player', axis=1).set_index('index')
    df.index.name = 'date'
    return df

In [14]:
import pandas as pd


In [9]:
from get_data import load_history, load_current, combine_history, process_pca, process_y, load_bios, load_team_data
# load_history()
# load_current()
# combine_history()
# load_team_data()
# load_bios()
# process_pca()
# process_y()


Loaded bios, writing to csv.
Read in game data.
Processed pca, writing to csv
Processed y, writing to csv


In [29]:
game_id = 427
# game_id = 453

league_id = 21834
q = YahooFantasySportsQuery(
    game_id = game_id,
    league_id = league_id,
    game_code = 'nhl',
    auth_dir = '/home/jupyter/creds',
    browser_callback = False,
)

In [17]:
# weeks = q.get_game_weeks_by_game_id(game_id)
players = get_players(q)
# teams = get_teams(q)
# teams = teams.reset_index().merge(players, on='player_key', how='left')


2024-09-23 13:26:24.042 - ERROR - query.py - yfpy.query:291 - No data found when attempting extraction from fields: ['league', 'players']


In [30]:
from process_data import get_player_stats, get_team_stats, get_bios, run_training, INDEX_COLS
X_p, y = get_player_stats()
X_b = get_bios(X_p)
X_t = get_team_stats()

In [23]:
X = X_p.groupby('playerId').shift(1)
X = X.join(X_b)

X_pt = X_t.groupby('team').shift(1).copy()
X_pt['home'] = X_t['home'].copy()

X_pt.index.names = ['opposingTeam','gameId','gameDate']
X = X.join(X_pt, rsuffix='_opp')

X_pt.index.names = ['playerTeam','gameId','gameDate']
X = X.join(X_pt, rsuffix='_team')

X = X.reset_index().set_index(INDEX_COLS)
X = X[~X[[str(i) for i in range(35)]].isna().all(1)]
y = y.loc[X.index]

In [25]:
pipes = run_training(X, y, True)

fow -- 0.7414251367418632
fow -- 0.7901185749527758
goals -- 0.07357906986823237
goals -- 0.2720227817788389
assists -- 0.08688721114812958
assists -- 0.39888354828236905
shots -- 0.2257047788509583
shots -- 1.0684372334351537
hits -- 0.16546566719263378
hits -- 1.0118081149267546
blocks -- 0.2236169059349279
blocks -- 0.7654522918586214
pim -- 0.02645390322644614
pim -- 0.7137072948602097
goalsfor -- 0.03874478359033917
goalsfor -- 0.6289361285565018
goalsaga -- 0.025841764003764034
goalsaga -- 0.6291549350778155
ppp -- 0.10559534277704385
ppp -- 0.15195601095192351


## DRAFT

In [31]:
game_id = 427

q = YahooFantasySportsQuery(
    game_id = game_id,
    league_id = league_id,
    game_code = 'nhl',
    auth_dir = '/home/jupyter/creds',
    browser_callback = False,
)
def get_all_matchups(game_id):
    q = get_q(game_id)
    all_matchups = []
    for i in range(1, 27):
        all_matchups += q.get_league_matchups_by_week(i)
    return all_matchups

def get_matchup_results(matchups):
    stats_map = {
        1: 'goals',
        2: 'assists',
        4: 'plusmin',
        5: 'pim',
        8: 'ppp',
        14: 'shots',
        16: 'fow',
        31: 'hits',
        32: 'blocks'
    }
    stats = []
    for match in matchups:
        for mt in match.teams:
            for s in mt.team_stats.stats:
                stats.append({'id': s.stat_id, 'val': s.value})
    stats = pd.DataFrame(stats)
    stats['name'] = stats.id.apply(lambda x: stats_map.get(x, None))
    return stats
    
    s_exp = stats.groupby('name')['val'].mean()

In [35]:
game_id = 453
q = YahooFantasySportsQuery(
    game_id = game_id,
    league_id = league_id,
    game_code = 'nhl',
    auth_dir = '/home/jupyter/creds',
    browser_callback = False,
)

In [36]:
def get_week_data(dates, games, X_p, X_t):
    dates = pd.to_datetime(X_p.index.get_level_values('gameDate'), format='%Y%m%d')
    player_stats = X_p[dates < week.start].reset_index(level='playerTeam').groupby(['playerId']).last()
    dates = pd.to_datetime(X_t.index.get_level_values('gameDate'), format='%Y%m%d')
    team_stats = X_t[dates < week.start].groupby('team').last()
    team_stats = team_stats.drop('home', axis=1)
    gs = pd.DataFrame(games)

    home_games = gs.merge(player_stats.reset_index(), left_on='home', right_on='playerTeam')
    home_games = home_games.merge(team_stats.reset_index(), left_on='home', right_on='team', suffixes=('', '_team'))
    home_games = home_games.merge(team_stats.reset_index(), left_on='away', right_on='team', suffixes=('', '_opp'))

    away_games = gs.merge(player_stats.reset_index(), left_on='away', right_on='playerTeam')
    away_games = away_games.merge(team_stats.reset_index(), left_on='away', right_on='team', suffixes=('', '_team'))
    away_games = away_games.merge(team_stats.reset_index(), left_on='home', right_on='team', suffixes=('', '_opp'))

    gs = pd.concat([home_games, away_games]).sort_values('gameId')
    gs['home_team'] = (gs['playerTeam']  == gs['home']).astype(int)
    gs['gameDate'] = pd.to_datetime(gs['ts'])
    gs['opposingTeam'] = gs.apply(lambda row: row['home'] if row['playerTeam'] == row['away'] else row['away'], axis=1)
    gs = gs.set_index(['gameId','gameDate','playerId','playerTeam', 'opposingTeam'])
    bios = get_bios(gs)
    gs = gs.join(bios)
    gs['home'] = 1 - gs['home_team']
    return gs

def get_opponent(m):
    m_t = [t.team_key for t in m.teams]
    if own_team in m_t:
        opponent = [t for t in m_t if t != own_team][0]
    return opponent

from scipy import stats
def get_added_val(x, opp_exp, stat_cols):
    vals = {}
    for col in stat_cols:
        if col == 'plusmin':
            vals[col] = stats.skellam.cdf(0, opp_exp['goalsfor']+x['goalsaga'], opp_exp['goalsaga']+x['goalsfor'])
        else:
            vals[col] = stats.skellam.cdf(0, opp_exp[col], x[col])
    return vals

d

In [1]:
def can_include_player(new_player, position_lookup, selected_players, position_limits, wildcard=None):
    from collections import defaultdict

    # Helper function to check if the current assignment of players to positions is valid
    def is_valid_assignment(assignments):
        counts = defaultdict(int)
        for player, pos in assignments.items():
            counts[pos] += 1
            if counts[pos] > position_limits.get(pos, 0):
                return False
        return True
    
    # Helper function for the backtracking algorithm
    def backtrack(assignments, players):
        if not players:
            return is_valid_assignment(assignments)
        
        current_player = players[0]
        possible_positions = position_lookup.get(current_player, [])
        
        # Include wildcard positions if the current player is the wildcard
        if current_player == wildcard:
            possible_positions = position_limits.keys()
        
        for pos in possible_positions:
            assignments[current_player] = pos
            if is_valid_assignment(assignments):
                if backtrack(assignments, players[1:]):
                    return True
            assignments.pop(current_player, None)
        return False
    
    # List of players to be considered (already selected + new player)
    all_players = selected_players + [new_player]
    
    # Initial assignments
    initial_assignments = {}
    
    # Run the backtracking algorithm
    return backtrack(initial_assignments, all_players)

def get_days_games(todays_games, players, selected_team):
    position_lookup = players.set_index('playerId')['pos'].to_dict()
    position_limits = {
        'D': 4,
        'C': 2,
        'RW': 2,
        'LW': 2,
        'G': 0
    }
    todays_players = todays_games[todays_games.index.get_level_values('playerId').isin(selected_team)].index.get_level_values('playerId').tolist()

    is_included = todays_games.reset_index(level='playerId')['playerId'].apply(lambda x: can_include_player(x, position_lookup, todays_players, position_limits, wildcard=None)).values
    is_inteam = todays_games.reset_index(level='playerId')['playerId'].isin(selected_team).values
    todays_games = todays_games[is_included|is_inteam]
    return todays_games

In [38]:



def get_week_games(dates, preds, players, selected_team):
    week_games = []
    for date in dates:
        week_games.append(get_days_games(date, preds, players, selected_team))
    week_games = pd.concat(week_games)
    return week_games
    

In [39]:
# own_team = '427.l.21834.t.3'
# matchups = q.get_team_matchups(own_team.split('.')[-1])

In [41]:
def get_preds(pipes, gs):
    import numpy as np
    preds = {}
    for c in PRED_COLS:
        preds[c] = pipes[c].predict(gs[pipes[c].feature_names_in_])
        preds[c] = np.clip(preds[c], 0, np.inf)

    preds = pd.DataFrame(preds, index=gs.index)
    return preds

def get_results(dates, preds, players, selected_team):
    player_ids = get_week_games(dates, preds, players.iloc[:0], selected_team)
    player_ids = player_ids.join(players.groupby('playerId')['player_key'].first()).groupby('gameDate')['player_key'].apply(list).to_dict()
    stats_map = {
        1: 'goals',
        2: 'assists',
        4: 'plusmin',
        5: 'pim',
        8: 'ppp',
        14: 'shots',
        16: 'fow',
        31: 'hits',
        32: 'blocks'
    }
    results = []
    for d, ps in player_ids.items():
        for p in ps:
            pstats = q.get_player_stats_by_date(p, d.strftime('%Y-%m-%d')).player_stats.stats
            pstats = {stats_map[s.stat_id]: s.value for s in pstats}
            results.append(pstats)
    return pd.DataFrame(results).sum()

In [None]:
dates = pd.date_range(, m.week_end)
games = get_schedule(dates)


In [1164]:
i = 0
for m in matchups:
    print('WEEK', m.week)
    dates = pd.date_range(m.week_start, m.week_end)
    games = get_schedule(dates)
    opponent = get_opponent(m)
    gs = get_week_data(dates, games, X_p, X_t)
    
    preds = get_preds(pipes, gs)


    opp_exp = teams[(teams['team_id'] == opponent)&(teams['date'] == dates[0])].merge(preds, on='playerId').set_index(['playerId', 'name'])[PRED_COLS]
    
    if i == 0:
        own_current = teams[(teams['team_id'] == own_team)&(teams['date'] == dates[0])].playerId.tolist()
    i+=1
    
    taken = teams[(teams['date'] == dates[0])&(teams['team_id'] != own_team)]['playerId'].tolist()
    
    
    selected_team = []

    player_info = players.set_index('playerId')[['name', 'pos']]
    while len(selected_team) < 14:
        week_ind_preds = get_week_games(dates, preds, players, selected_team)
        week_preds = week_ind_preds.groupby('playerId').sum()
        if len(selected_team) < 12:
            team_left = [p for p in own_current if p not in selected_team]
            current_exp = week_preds.loc[week_preds.index.isin(team_left)].copy()
        else:
            current_exp = week_preds[~week_preds.index.isin(taken+selected_team)].copy()

        current_exp += week_preds.loc[selected_team].sum()

        exp = get_added_vals(current_exp, opp_exp.sum())
        selected_team.append(exp.idxmax())
    
    res = get_results(dates, preds, players, selected_team)
    print('DROPS\n', player_info.loc[[t for t in own_current if t not in selected_team]])
    print('ADDS\n', player_info.loc[[t for t in selected_team if t not in own_current]])
    print('TEAM\n', player_info.loc[selected_team])
    print('RESULT\n', res)
    own_current = [n for n in selected_team]

WEEK 13
DROPS
                      name      pos
playerId                          
8470794      Joe Pavelski  [C, RW]
8474593   Jacob Markstrom      [G]
8477465     Tristan Jarry      [G]
8475753      Justin Faulk      [D]
8478444      Brock Boeser     [RW]
8479394       Carter Hart      [G]
ADDS
                    name      pos
playerId                        
8478366   Frank Vatrano  [C, LW]
8474590    John Carlson      [D]
TEAM
                        name       pos
playerId                             
8478483     Mitchell Marner      [RW]
8475184       Chris Kreider      [LW]
8474564      Steven Stamkos   [C, LW]
8471685        Anze Kopitar       [C]
8476468         J.T. Miller   [C, RW]
8477934      Leon Draisaitl   [C, LW]
8477346    MacKenzie Weegar       [D]
8475167       Victor Hedman       [D]
8476372         Nick Seeler       [D]
8474565    Alex Pietrangelo       [D]
8477501   Valeri Nichushkin  [LW, RW]
8478445       Mathew Barzal   [C, RW]
8478366       Frank Vatrano  

KeyboardInterrupt: 