In [1]:
import pandas as pd
import sqlite3 as sq
import duckdb

In [2]:
stat_cols = ['completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr', 'special_teams_tds', 'fantasy_points', 'fantasy_points_ppr',]
cols2roll = ["completions", "attempts", "carries", "passing_yards", "rushing_yards", "pass_pct", "receiving_yards"]

In [3]:
weekly = pd.read_parquet('data/weekly.parquet')
per_game = weekly.groupby(['season','week','recent_team', 'opponent_team'])[stat_cols].sum().reset_index().sort_values(by=['season','week'])
per_game['pass_pct'] = per_game['attempts'] / (per_game['attempts'] + per_game['carries'])

In [8]:
roll_stats = [x + '_team_roll' for x in cols2roll]
print(roll_stats)
team = per_game.copy()
team[roll_stats] = team.groupby("recent_team")[
    cols2roll
].transform(lambda x: x.rolling(7, min_periods=1).mean()).sort_values(by='completions')
team[roll_stats] = team[roll_stats] / team[roll_stats].mean()
team[['recent_team','season','week'] + roll_stats]

['completions_team_roll', 'attempts_team_roll', 'carries_team_roll', 'passing_yards_team_roll', 'rushing_yards_team_roll', 'pass_pct_team_roll', 'receiving_yards_team_roll']


Unnamed: 0,recent_team,season,week,completions_team_roll,attempts_team_roll,carries_team_roll,passing_yards_team_roll,rushing_yards_team_roll,pass_pct_team_roll,receiving_yards_team_roll
0,ARI,2010,1,1.029333,1.208935,0.799169,1.225638,0.998875,1.174367,1.225659
1,ATL,2010,1,1.263272,1.297394,0.951392,1.039936,0.517274,1.132440,1.039953
2,BAL,2010,1,0.935757,1.120476,1.331949,1.023429,0.437008,0.924427,1.023446
3,BUF,2010,1,0.842182,1.002532,0.646947,0.573615,0.445926,1.183915,0.573625
4,CAR,2010,1,0.655030,1.032018,0.913336,0.751065,0.793749,1.053484,0.751077
...,...,...,...,...,...,...,...,...,...,...
8284,MIN,2024,19,1.102857,1.086778,0.864408,1.088867,0.857452,1.100277,1.088885
8285,PHI,2024,19,0.788710,0.762429,1.337385,0.746938,1.388742,0.744364,0.746950
8286,PIT,2024,19,0.935757,0.914073,0.907900,0.907880,0.902045,1.018064,0.907895
8287,TB,2024,19,0.895653,0.762429,1.000321,0.973318,1.262608,0.875761,0.973335


In [7]:
opp_stats = [x + '_opp_roll' for x in cols2roll]
opp = per_game.copy()
opp[opp_stats] = opp.groupby("opponent_team")[
    cols2roll
].transform(lambda x: x.rolling(7, min_periods=1).mean()).sort_values(by='completions')
opp['yards_per_carry_opp_roll'] = opp['rushing_yards_opp_roll'] / opp['carries_opp_roll']
opp_stats.append('yards_per_carry_opp_roll')
opp[opp_stats] = opp[opp_stats] / opp[opp_stats].mean()
opp[['opponent_team','season','week'] + opp_stats]

Unnamed: 0,opponent_team,season,week,completions_opp_roll,attempts_opp_roll,carries_opp_roll,passing_yards_opp_roll,rushing_yards_opp_roll,pass_pct_opp_roll,receiving_yards_opp_roll,yards_per_carry_opp_roll
0,LA,2010,1,1.029233,1.208783,0.799324,1.225643,0.999226,1.174263,1.225665,1.254933
1,PIT,2010,1,1.263149,1.297231,0.951576,1.039939,0.517456,1.132340,1.039959,0.545896
2,NYJ,2010,1,0.935666,1.120336,1.332206,1.023432,0.437161,0.924344,1.023451,0.329420
3,MIA,2010,1,0.842100,1.002406,0.647072,0.573617,0.446083,1.183809,0.573628,0.692059
4,NYG,2010,1,0.654966,1.031888,0.913513,0.751067,0.794027,1.053390,0.751081,0.872571
...,...,...,...,...,...,...,...,...,...,...,...
8284,LA,2024,19,1.109433,1.103489,0.842824,1.009873,0.929127,1.112868,0.991026,1.106669
8285,GB,2024,19,1.142849,1.027676,0.924388,0.960942,0.787655,1.047043,0.960959,0.855385
8286,BAL,2024,19,0.882200,0.930805,0.783011,0.817685,0.685693,1.081306,0.817700,0.879107
8287,WAS,2024,19,0.842100,0.909746,0.984201,0.866616,0.997951,0.983378,0.866632,1.017900


In [6]:
team[['recent_team','season','week'] + roll_stats].to_csv('data/agg/team_stats.csv')
opp[['opponent_team','season','week'] + opp_stats].to_csv('data/agg/opp_stats.csv')