In [14]:
import numpy as np
import pandas as pd

In [15]:
url = 'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet'
df = pd.read_parquet(url)

In [16]:
# downcast to float32
cols = df.select_dtypes(include=[np.float64]).columns
df.loc[:, cols] = df.loc[:, cols].astype(np.float32)

In [17]:
# add half-ppr scoring
df = df.assign(fantasy_points_hppr=(df.fantasy_points + df.fantasy_points_ppr) / 2)

In [18]:
# add player positions
pdf = pd.read_csv('https://github.com/nflverse/nflfastR-roster/raw/master/data/nflfastR-roster.csv.gz', compression='gzip', low_memory=False)

In [19]:
df = df.join(pdf.set_index(['gsis_id', 'season']).loc[:, ['full_name', 'position']], how='left', on=['player_id', 'season'])

In [20]:
# filter columns
wanted = ['season', 'week', 'player_id', 'full_name', 'position', 'fantasy_points', 'fantasy_points_ppr', 'fantasy_points_hppr']
df2 = df.loc[df.season == 2020, wanted]

In [21]:
# calculate season stats
# seas.loc[seas.position == 'QB', :].sort_values('posrk')
seas = (
  df2
  .groupby(['player_id', 'full_name', 'position'], as_index=False)
  .agg(fptot=('fantasy_points', 'sum'),
      fptot_ppr=('fantasy_points_ppr', 'sum'),
      fptot_hppr=('fantasy_points_hppr', 'sum'),
      fppg=('fantasy_points', 'mean'),
      fppg_ppr=('fantasy_points_ppr', 'mean'), 
      fppg_hppr=('fantasy_points_hppr', 'mean')
      )
  .assign(posrk=lambda x: x.groupby('position')['fptot_hppr'].rank(method='first', ascending=False))
)

In [22]:
# get the top 20 QBs
qbids = seas.loc[(seas.position == 'QB') & (seas.posrk <= 20), 'player_id']
qbs = df2.loc[df2.player_id.isin(qbids), :]

In [23]:
# we want to be able to simulate a bye
# also need to do it over 16 games based on 15 games from previous year
# so we want to get even-length arrays based on scores from week 1-6
# then we are going to fill with mean value
# then we will test inserting a 0 both at the beginning or one at beginning or one at end
# then we take the greater value of the two
qbs = (
  pd.DataFrame({'season': 2020, 'week': range(1, 17)})
  .merge(qbs.loc[qbs.week < 17, ['player_id']].drop_duplicates(), how='cross')
  .join(qbs.set_index(['season', 'week', 'player_id']), how='left', on=['season', 'week', 'player_id'])
  .assign(full_name=lambda x: x.groupby('player_id')['full_name'].bfill().ffill(),
          position=lambda x: x.groupby('player_id')['position'].bfill().ffill(),
          fantasy_points=lambda x: x.groupby('player_id')['fantasy_points'].transform(lambda y: y.fillna(y.mean())),
          fantasy_points_ppr=lambda x: x.groupby('player_id')['fantasy_points_ppr'].transform(lambda y: y.fillna(y.mean())),
          fantasy_points_hppr=lambda x: x.groupby('player_id')['fantasy_points_hppr'].transform(lambda y: y.fillna(y.mean()))
         )
)

In [24]:
# try out vectorized approach
vals = []
iterations = 100
weeks = 16
rng = np.random.default_rng()
shuffled_indices = rng.integers(0, weeks, size=(iterations, weeks)).argsort(axis=1)

for i in range(1000): #(100):
    choices = qbids.sample(2).values
    p1 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
    p2 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices]))
    score = np.array([p1, p2]).max(axis=0)

    p1d = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
    p2d= np.column_stack((qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices], np.zeros(iterations)))
    scored = np.array([p1d, p2d]).max(axis=0)
    
    vals.append({'same': score.sum(axis=1).mean(), 'diff': scored.sum(axis=1).mean()})

In [25]:
pd.DataFrame(vals).assign(delta=lambda x: x['diff'] - x.same).describe()

Unnamed: 0,same,diff,delta
count,1000.0,1000.0,1000.0
mean,389.681822,404.751389,15.069567
std,43.261319,44.314858,9.448264
min,296.982668,314.531255,-8.938615
25%,358.273333,376.251625,8.284986
50%,395.575997,409.417826,15.513984
75%,422.083998,437.455267,21.458839
max,469.326668,495.456181,44.826347


In [26]:
# 3 QBs, 2 with same bye
vals = []
iterations = 100
weeks = 16
rng = np.random.default_rng()
shuffled_indices = rng.integers(0, weeks, size=(iterations, weeks)).argsort(axis=1)

for i in range(1000): #(100):
    # all same bye
    choices = qbids.sample(3).values
    p1 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
    p2 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices]))
    p3 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]))
    score = np.array([p1, p2, p3]).max(axis=0)

    # two share same bye
    p1d = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
    p2d= np.column_stack((qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices], np.zeros(iterations)))
    p3d = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]))
    scored = np.array([p1d, p2d, p3d]).max(axis=0)
    
    # no shared byes
    p1a = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
    p2a= np.column_stack((qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices], np.zeros(iterations)))
    tmp = qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]
    p3a = np.hstack((tmp[:, :2], np.zeros((iterations, 1)), tmp[:, 2:]))
    scorea = np.array([p1a, p2a, p3a]).max(axis=0)
    
    vals.append({'same': score.sum(axis=1).mean(), '1diff': scored.sum(axis=1).mean(), 'adiff': scorea.sum(axis=1).mean()})


In [27]:
pd.DataFrame(vals).describe()

Unnamed: 0,same,1diff,adiff
count,1000.0,1000.0,1000.0
mean,427.217784,444.848492,446.837528
std,35.772252,37.95507,37.892826
min,331.21867,345.700834,347.06472
25%,404.252667,419.839324,422.277345
50%,429.377331,446.970086,449.637826
75%,456.021336,474.735408,477.058783
max,503.326664,524.200982,526.687089


In [None]:
# try to parameterize
def byesim(df: pd.DataFrame, 
           n_players: int, 
           fpts_col: str,
           weeks: int = 16, 
           combinations: int = 1000, 
           shuffles: int = 100) -> pd.DataFrame:
    """Simulates the effect of shared / staggered bye weeks
    
    Args:
        df (DataFrame): the weekly rows for each eligible player
        n_players (int): the number of players to analyze
        fpts_col (str): the column with fantasy points
        weeks (int): the number of weeks with scores, default 16, which is 16 week fantasy season + 1 bye week
        combinations (int): the number of player combinations to test
        shuffles (int): the number of random shuffles of weekly scores for each combination of players.

    Returns:
        DataFrame
        
    """
    if n_players < 2:
        raise ValueError('Must have at least 2 players')
    
    vals = []

    # get 2D array of shuffled indices
    # shape is (shuffles, weeks), so default is (100, 16)
    rng = np.random.default_rng()
    shuffled_indices = rng.integers(0, weeks, size=(shuffles, weeks)).argsort(axis=1)

    for i in range(combinations):
        choices = rng.choice(df.player_id.unique, size=n_players, replace=False)
        
        for n_same_byes in range(n_players):
            # create range of zeros
            # each column represents different bye weeks
            zeros = np.zeros(shape=(weeks, n_players))
            if n_same_bye == 0:
                # tmp = qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]
                # p3a = np.hstack((tmp[:, :2], np.zeros((iterations, 1)), tmp[:, 2:]))
                
                diff_bye = [np.column_stack((np.zeros(iterations), df.loc[lambda x: x.player_id == choices[n_same_bye - 1], fpts_col].values[shuffled_indices] 
                            for _ in range(n_same_byes)]        


            else:
                same_bye = [np.column_stack((np.zeros(iterations), df.loc[lambda x: x.player_id == choices[n_same_bye - 1], fpts_col].values[shuffled_indices] 
                            for _ in range(n_same_byes)]        

        # STOPPED HERE
                                             
        # all same bye
        p1 = ))
        p2 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices]))
        p3 = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]))
        score = np.array([p1, p2, p3]).max(axis=0)

        # two share same bye
        p1d = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
        p2d= np.column_stack((qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices], np.zeros(iterations)))
        p3d = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]))
        scored = np.array([p1d, p2d, p3d]).max(axis=0)

        # no shared byes
        p1a = np.column_stack((np.zeros(iterations), qbs.loc[lambda x: x.player_id == choices[0], 'fantasy_points_hppr'].values[shuffled_indices]))
        p2a= np.column_stack((qbs.loc[lambda x: x.player_id == choices[1], 'fantasy_points_hppr'].values[shuffled_indices], np.zeros(iterations)))
        tmp = qbs.loc[lambda x: x.player_id == choices[2], 'fantasy_points_hppr'].values[shuffled_indices]
        p3a = np.hstack((tmp[:, :2], np.zeros((iterations, 1)), tmp[:, 2:]))
        scorea = np.array([p1a, p2a, p3a]).max(axis=0)

        vals.append({'same': score.sum(axis=1).mean(), '1diff': scored.sum(axis=1).mean(), 'adiff': scorea.sum(axis=1).mean()})


# Simulate 4QBs

In [None]:
# get some trash QBs
trash_qbids = seas.loc[(seas.position == 'QB') & (seas.posrk > 10) & (seas.posrk <= 35), 'player_id'].unique()
tqbs = qbs.loc[lambda x: x.player_id.isin(trash_qbids), :]

In [None]:
# try out vectorized approach
vals = []
weeks = 16
n_players = 4
iterations = 500

for i in range(10000):
    players = np.array([tqbs.loc[lambda x: x.player_id == choice, 'fantasy_points_hppr'].values
                        for choice in rng.choice(tqbs.player_id.unique(), size=n_players, replace=False)])
    players = np.tile(players, iterations).reshape(iterations, players.shape[0], players.shape[1])
    rng.shuffle(players)
    vals.append(players.max(axis=1).sum(axis=1).mean())

In [None]:
pd.DataFrame(data=vals, columns=['scores']).describe()

In [None]:
# try out vectorized approach
vals = []
weeks = 16
n_players = 3
iterations = 500

for i in range(10000):
    players = np.array([tqbs.loc[lambda x: x.player_id == choice, 'fantasy_points_hppr'].values
                        for choice in rng.choice(tqbs.player_id.unique(), size=n_players, replace=False)])
    players = np.tile(players, iterations).reshape(iterations, players.shape[0], players.shape[1])
    rng.shuffle(players)
    vals.append(players.max(axis=1).sum(axis=1).mean())

In [None]:
pd.DataFrame(data=vals, columns=['scores']).describe()

In [None]:
# get some good QBs
good_qbids = seas.loc[(seas.position == 'QB') & (seas.posrk <= 10), 'player_id'].unique()
gqbs = qbs.loc[lambda x: x.player_id.isin(good_qbids), :]

In [None]:
# try out vectorized approach
vals = []
weeks = 16
n_players = 2
iterations = 500

for i in range(10000):
    players = np.array([gqbs.loc[lambda x: x.player_id == choice, 'fantasy_points_hppr'].values
                        for choice in rng.choice(gqbs.player_id.unique(), size=n_players, replace=False)])
    players = np.tile(players, iterations).reshape(iterations, players.shape[0], players.shape[1])
    rng.shuffle(players)
    vals.append(players.max(axis=1).sum(axis=1).mean())

In [None]:
pd.DataFrame(data=vals, columns=['scores']).describe()