In [24]:
import pandas as pd
pd.set_option('display.max_columns', 55)

In [25]:
weekly = pd.read_parquet('data/weekly.parquet')
weekly = weekly.loc[weekly.index != 3821]

In [26]:
depth = pd.read_csv('data/depth.csv', index_col=0).sort_values(by='depth_team')
depth['dense_depth'] = depth.groupby(['club_code','season','week','position', 'game_type','formation']).cumcount()
off = depth.loc[depth.formation=='Offense']

In [27]:
context_cols = ['player_id', 'player_display_name', 'position',
       'position_group',
]
game_context = ['week','season_type', 'season', 'opponent_team', 'recent_team', ]
stat_cols = ['completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr', 'special_teams_tds', 'fantasy_points', 'fantasy_points_ppr']
rush_stat_cols = [
     'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'fantasy_points', 'fantasy_points_ppr'
]

depth_cols = ['season', 'last_name',
       'first_name', 'football_name', 'formation', 'gsis_id', 'jersey_number',
       'position', 'elias_id', 'depth_position', 'full_name']

In [28]:
qb_stats_eng = [
    "completions",
    "attempts",
    "passing_yards",
    "passing_tds",
    "interceptions",
    "sacks",
    "sack_yards",
    "passing_air_yards",
    "passing_yards_after_catch",
    "passing_first_downs",
    "passing_2pt_conversions",
    "fantasy_points_ppr"
]

rush_stats_eng = [
    "carries",
    "rushing_yards",
    "rushing_tds",
    "rushing_first_downs",
    "receptions",
    "targets",
    "receiving_yards",
    "receiving_tds",
    "fantasy_points",
    "fantasy_points_ppr",
]
receiver_stats_eng = [
    "receptions",
    "targets",
    "receiving_yards",
    "receiving_tds",
    "receiving_air_yards",
    "receiving_yards_after_catch",
    "receiving_first_downs",
    "target_share",
    "air_yards_share",
    "fantasy_points_ppr",
]


In [29]:
stats_to_eng = list(set(qb_stats_eng + rush_stats_eng + receiver_stats_eng))

In [30]:
#week_small = weekly.loc[(weekly.player_id=='00-0039746') & (weekly.season==2024)]
temp = off.merge(
    weekly,
    how="outer",
    left_on=["gsis_id", "club_code", "season", "week", "position"],
    right_on=["player_id", "recent_team", "season", "week", "position"],
).sort_values(by="week").reset_index(drop=True)


In [31]:
temp[context_cols] = temp.groupby(depth_cols)[context_cols].bfill()
temp[context_cols] = temp.groupby(depth_cols)[context_cols].ffill()
temp[stat_cols] = temp[stat_cols].fillna(0)

In [32]:
player_fantasy = temp.copy(deep=True).sort_values(by=['season','week']) #[context_cols + game_context + rush_stat_cols]
#player_fantasy.loc[player_fantasy.gsis_id=="00-0039746"]

In [33]:
roll_columns = [x + '_roll' for x in stats_to_eng]
player_fantasy[roll_columns] = player_fantasy.groupby(context_cols)[stats_to_eng].transform(lambda x: x.ewm(span=6, min_periods=0).mean())
#player_fantasy[['carries_median']] = player_fantasy.groupby(context_cols)[['carries']].transform(lambda x: x.rolling(6, min_periods=0).median())
#player_fantasy.loc[player_fantasy.gsis_id=="00-0039746"][['carries','carries_median']]

In [95]:
player_fantasy = player_fantasy.rename({'club_code':'team_name'},axis=1)
player_fantasy.to_parquet('data/agg/player_weekly_agg.parquet')

In [None]:
agg = player_fantasy.copy()
agg = agg.sort_values(by=['club_code','season','week','full_name'])

In [70]:
a =weekly.loc[(weekly.targets > 0) & (weekly.position.isin(["RB", "WR", "TE"]))][
    ["player_display_name", "targets", "week", "season", "position", "recent_team"]
].groupby(["week", "season", "recent_team", "position"]).count().reset_index().groupby(
    ["position"]
)["targets"].value_counts().reset_index()


In [71]:
def stack_players(df, num_players, index_cols, value_cols):
    'df should be seperated by position already'
    df = df.loc[df.dense_depth < num_players]
    piv = df.pivot(index=index_cols, columns=['position','dense_depth'], values=value_cols).reset_index()
    piv.columns = [x[0] + '_' + x[1] +'_' +  (str(int(x[2]))) if x[2] != '' else x[0] + x[1] for x in piv.columns ]
    return piv

In [73]:
cols_to_agg =  (['gsis_id','full_name'] + roll_columns)

team_player_stats = stack_players(agg.loc[(agg.position.isin(['RB','WR'])) & (~agg.week.isna())],3,  ['team_name','season','week', 'game_type'], cols_to_agg)
agg_roll_cols = [x for x in team_player_stats.columns if '_roll' in x]
team_player_stats[agg_roll_cols] = team_player_stats[agg_roll_cols].fillna(0).astype(float)

  team_player_stats[agg_roll_cols] = team_player_stats[agg_roll_cols].fillna(0).astype(float)


In [74]:
agg.loc[(agg.position.isin(['QB'])) & (~agg.week.isna())][['team_name','season','week', 'game_type', 'dense_depth']]

Unnamed: 0,team_name,season,week,game_type,dense_depth
4027,ARI,2010,1.0,REG,0.0
2573,ARI,2010,1.0,REG,2.0
381,ARI,2010,1.0,REG,1.0
24005,ARI,2010,2.0,REG,0.0
14632,ARI,2010,2.0,REG,2.0
...,...,...,...,...,...
133529,WAS,2024,11.0,REG,2.0
135111,WAS,2024,11.0,REG,1.0
137968,WAS,2024,12.0,REG,0.0
141537,WAS,2024,12.0,REG,2.0


In [89]:
qb_roll_stat_cols = ['gsis_id','full_name'] + [x + '_roll' for x in qb_stats_eng] 
receiver_roll_stat_cols = ['gsis_id','full_name'] + [x + '_roll' for x in receiver_stats_eng] 
rush_stat_cols = ['gsis_id','full_name'] + [x + '_roll' for x in rush_stats_eng]

team_qb_stats = stack_players(agg.loc[(agg.position.isin(['QB'])) & (~agg.week.isna())],2,  ['team_name','season','week', 'game_type'], qb_roll_stat_cols)
wr_temp = stack_players(agg.loc[(agg.position.isin(['WR'])) & (~agg.week.isna())],5,  ['team_name','season','week', 'game_type'], receiver_roll_stat_cols)
te_temp = stack_players(agg.loc[(agg.position.isin(['TE'])) & (~agg.week.isna())],3,  ['team_name','season','week', 'game_type'], receiver_roll_stat_cols)
rb_temp =stack_players(agg.loc[(agg.position.isin(['RB'])) & (~agg.week.isna())],2,  ['team_name','season','week', 'game_type'], receiver_roll_stat_cols)
team_receiver_stats = wr_temp.merge(te_temp, on=['team_name','season','week', 'game_type'])
team_receiver_stats = team_receiver_stats.merge(rb_temp, on=['team_name','season','week', 'game_type'])
team_rb_stats = stack_players(agg.loc[(agg.position.isin(['RB'])) & (~agg.week.isna())],3,  ['team_name','season','week', 'game_type'], rush_stat_cols)

In [90]:
team_receiver_stats.columns

Index(['team_name', 'season', 'week', 'game_type', 'gsis_id_WR_2',
       'gsis_id_WR_1', 'gsis_id_WR_4', 'gsis_id_WR_3', 'gsis_id_WR_0',
       'full_name_WR_2',
       ...
       'receiving_yards_after_catch_roll_RB_1',
       'receiving_yards_after_catch_roll_RB_0',
       'receiving_first_downs_roll_RB_1', 'receiving_first_downs_roll_RB_0',
       'target_share_roll_RB_1', 'target_share_roll_RB_0',
       'air_yards_share_roll_RB_1', 'air_yards_share_roll_RB_0',
       'fantasy_points_ppr_roll_RB_1', 'fantasy_points_ppr_roll_RB_0'],
      dtype='object', length=124)

In [91]:
team_qb_stats.to_csv('data/team_qb_stats.csv')
team_receiver_stats.to_csv('data/team_receiver_stats.csv')
team_rb_stats.to_csv('data/team_rb_stats.csv')