## Expected Features

References: 
- https://www.samford.edu/sports-analytics/fans/2023/How-I-Built-a-Competitive-NFL-Prediction-Model-with-Only-Five-Statistics
- https://github.com/theedgepredictor/nfl-feature-store
- https://github.com/theedgepredictor/nfl-model-store


### Raw Features

From our feature store. All features are rolled up for the current season up to the given week.

Features that are only available for offense OR defense OR classifying or target metrics
- Targets: Classification or Regression metrics for the given week (only available for completed games)
- Meta: Classifying stats for grouping team based stats
- Vegas: Latest Vegas lines (Historically mid - close week lines)
- ELO: ELO ratings 

Features that are available for both offense and defense:
- EWMA: Expected weighted moving averages
- Point: Point avgs and point differential avgs (whole game and quarters)
- Cover: Cover avgs (Last 10 games)
- Down: Down avgs 
- Fantasy: Fantasy points avg for whole team (Collected from offensive player stats and averaged)
- Common: Common features that are available across positions 
- Common Passing: Common passing features
- Common Rushing: Common rushing features
- Ranking: Ranking features (These might not be important for expected features but are available for both offense and defense)
- Kicking: Kickoff metrics


In [2]:
TARGETS = [
    'home_score',
    'away_score',
    'away_team_win',
    'away_team_spread',
    'total_target',
    'away_team_covered',
    'home_team_covered',
    'under_covered',
    'away_team_covered_spread',   
]

META = [
    'season',
    'week',
    'home_team',
    'away_team',
]

VEGAS = [
    'spread_line',
    'total_line',
]

ELO = [
    'elo_pre',
    #'elo_prob',
]

EWMA_FEATURES = [
    'ewma_rushing_offense',
    'ewma_rushing_defense',
    'ewma_passing_offense',
    'ewma_passing_defense',
    'ewma_score_offense',
    'ewma_score_defense',
]

POINT_FEATURES = [
    'avg_points_offense',
    'avg_points_defense',
    'avg_point_differential_offense',
    'avg_point_differential_defense',
    'avg_q1_point_diff_offense',
    'avg_q2_point_diff_offense',
    'avg_q3_point_diff_offense',
    'avg_q4_point_diff_offense',
    'avg_q5_point_diff_offense',
    'avg_q1_points_offense',
    'avg_q2_points_offense',
    'avg_q3_points_offense',
    'avg_q4_points_offense',
    'avg_q5_points_offense',
    'avg_q1_point_diff_defense',
    'avg_q2_point_diff_defense',
    'avg_q3_point_diff_defense',
    'avg_q4_point_diff_defense',
    'avg_q5_point_diff_defense',
    'avg_q1_points_defense',
    'avg_q2_points_defense',
    'avg_q3_points_defense',
    'avg_q4_points_defense',
    'avg_q5_points_defense',
]

ROLLING_COVER_FEATURES = [    
    'rolling_spread_cover',
    'rolling_under_cover'
]

DOWN_FEATURES = [
    'avg_first_down_offense',
    'avg_first_down_defense',
    'avg_third_down_converted_offense',
    'avg_third_down_converted_defense',
    'avg_third_down_failed_offense',
    'avg_third_down_failed_defense',
    'avg_fourth_down_converted_offense',
    'avg_fourth_down_converted_defense',
    'avg_fourth_down_failed_offense',
    'avg_fourth_down_failed_defense',
    'avg_third_down_percentage_offense',
    'avg_third_down_percentage_defense',
    'avg_fourth_down_percentage_offense',
    'avg_fourth_down_percentage_defense',
    'avg_first_down_penalty_offense',
    'avg_first_down_penalty_defense',
]

FANTASY_FEATURES = [
    #'avg_fantasy_points_offense',
    #'avg_fantasy_points_defense',
    #'avg_fantasy_points_half_ppr_offense',
    #'avg_fantasy_points_half_ppr_defense',
    'avg_fantasy_points_ppr_offense',
    'avg_fantasy_points_ppr_defense',
]

COMMON_FEATURES = [
    'avg_total_plays_offense',
    'avg_total_plays_defense',
    'avg_total_yards_offense',
    'avg_total_yards_defense',
    'avg_total_fumbles_offense',
    'avg_total_fumbles_defense',
    'avg_total_fumbles_lost_offense',
    'avg_total_fumbles_lost_defense',
    'avg_total_turnovers_offense',
    'avg_total_turnovers_defense',
    'avg_total_touchdowns_offense',
    'avg_total_touchdowns_defense',
    'avg_total_first_downs_offense',
    'avg_total_first_downs_defense',
    'avg_touchdown_per_play_offense',
    'avg_touchdown_per_play_defense',
    'avg_yards_per_play_offense',
    'avg_yards_per_play_defense',
    'avg_fantasy_point_per_play_offense',
    'avg_fantasy_point_per_play_defense',
    'avg_pass_to_rush_ratio_offense',
    'avg_pass_to_rush_ratio_defense',
    'avg_pass_to_rush_first_down_ratio_offense',
    'avg_pass_to_rush_first_down_ratio_defense',
    'avg_shotgun_offense',
    'avg_shotgun_defense',
    'avg_no_huddle_offense',
    'avg_no_huddle_defense',
    'avg_qb_dropback_offense',
    'avg_qb_dropback_defense',
    'avg_qb_scramble_offense',
    'avg_qb_scramble_defense',
    'avg_goal_to_go_offense',
    'avg_goal_to_go_defense',
    'avg_is_redzone_offense',
    'avg_is_redzone_defense',
    'avg_epa_offense',
    'avg_epa_defense',
    'avg_wpa_offense',
    'avg_wpa_defense',
    'avg_time_of_possession_offense',
    'avg_time_of_possession_defense',
    'avg_turnover_offense',
    'avg_turnover_defense',
]

PENALTY_FEATURES = [
    'avg_offensive_penalty_yards',
    'avg_defensive_penalty_yards',
    'avg_offensive_penalty',
    'avg_defensive_penalty',
]

COMMON_PASSING_FEATURES = [
    'avg_completions_offense',
    'avg_completions_defense',
    'avg_attempts_offense',
    'avg_attempts_defense',
    'avg_passing_yards_offense',
    'avg_passing_yards_defense',
    'avg_passing_tds_offense',
    'avg_passing_tds_defense',
    'avg_interceptions_offense',
    'avg_interceptions_defense',
    'avg_sacks_offense',
    'avg_sacks_defense',
    'avg_sack_yards_offense',
    'avg_sack_yards_defense',
    'avg_sack_fumbles_lost_offense',
    'avg_sack_fumbles_lost_defense',
    'avg_passing_air_yards_offense',
    'avg_passing_air_yards_defense',
    'avg_passing_yards_after_catch_offense',
    'avg_passing_yards_after_catch_defense',
    'avg_passing_first_downs_offense',
    'avg_passing_first_downs_defense',
    'avg_passing_epa_offense',
    'avg_passing_epa_defense',
    'avg_pacr_offense',
    'avg_pacr_defense',
    'avg_dakota_offense',
    'avg_dakota_defense',
    'avg_completion_percentage_offense',
    'avg_completion_percentage_defense',
    'avg_qbr_offense',
    'avg_qbr_defense',
    'avg_yards_per_pass_attempt_offense',
    'avg_yards_per_pass_attempt_defense',
    'avg_sack_rate_offense',
    'avg_sack_rate_defense',
    
]

COMMON_RUSHING_FEATURES = [
    'avg_carries_offense',
    'avg_carries_defense',
    'avg_rushing_yards_offense',
    'avg_rushing_yards_defense',
    'avg_rushing_tds_offense',
    'avg_rushing_tds_defense',
    'avg_rushing_fumbles_lost_offense',
    'avg_rushing_fumbles_lost_defense',
    'avg_rushing_first_downs_offense',
    'avg_rushing_first_downs_defense',
    'avg_rushing_epa_offense',
    'avg_rushing_epa_defense',
]

RANKING_FEATURES = [
    'elo_pre_rank',
    'avg_points_offense_rank',
    'avg_rushing_yards_offense_rank',
    'avg_passing_yards_offense_rank',
    'avg_total_yards_offense_rank',
    'avg_yards_per_play_offense_rank',
    'avg_total_turnovers_offense_rank',
    'avg_points_defense_rank',
    'avg_rushing_yards_defense_rank',
    'avg_passing_yards_defense_rank',
    'avg_total_yards_defense_rank',
    'avg_yards_per_play_defense_rank',
    'avg_total_turnovers_defense_rank',
    'offensive_rank',
    'defensive_rank',
    'net_rank',
]

KICKING_FEATURES = [
    'avg_field_goal_made_offense',
    'avg_field_goal_made_defense',
    'avg_field_goal_attempt_offense',
    'avg_field_goal_attempt_defense',
    'avg_field_goal_distance_offense',
    'avg_field_goal_distance_defense',
    'avg_extra_point_made_offense',
    'avg_extra_point_made_defense',
    'avg_extra_point_attempt_offense',
    'avg_extra_point_attempt_defense',
    'avg_field_goal_percentage_offense',
    'avg_field_goal_percentage_defense',
    'avg_extra_point_percentage_offense',
    'avg_extra_point_percentage_defense',
]

JUST_SIMPLE_FEATURES = [
    'avg_carries_offense',
    'avg_carries_defense',
    'avg_rushing_yards_offense',
    'avg_rushing_yards_defense',
    'avg_rushing_tds_offense',
    'avg_rushing_tds_defense',
    'avg_completions_offense',
    'avg_completions_defense',
    'avg_attempts_offense',
    'avg_attempts_defense',
    'avg_passing_yards_offense',
    'avg_passing_yards_defense',
    'avg_passing_tds_offense',
    'avg_passing_tds_defense',
    'avg_time_of_possession_offense',
    'avg_time_of_possession_defense',
    'avg_turnover_offense',
    'avg_turnover_defense',
    'avg_field_goal_made_offense',
    'avg_field_goal_made_defense',
]

### 1. Config

In [3]:
import pandas as pd
import numpy as np
import datetime
from pandas.core.dtypes.common import is_numeric_dtype

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

def get_event_feature_store(season):
    return pd.read_parquet(f'https://github.com/theedgepredictor/nfl-feature-store/raw/main/data/feature_store/event/regular_season_game/{season}.parquet')


def df_rename_shift(df, drop_cols=None):
    """
    Shifts df from away and home to team offense and opponent defense based stats. 
    Ex: away_team = away_avg_turnovers_offense AND home_avg_turnovers_defense stats  
    Ex: home_team = home_avg_turnovers_offense AND away_avg_turnovers_defense stats
    """
    if drop_cols is not None:
        df = df.drop(columns=drop_cols)

    root_cols = [col for col in df.columns if '_offense' not in col and '_defense' not in col and 'away_' not in col and 'home_' not in col]

    away_cols = [col for col in df.columns if '_offense' not in col and '_defense' not in col and 'away_' in col and 'home_' not in col]
    away_rename_dict = {col: col.replace('away_', '') for col in away_cols}
    home_cols = [col for col in df.columns if '_offense' not in col and '_defense' not in col and 'away_' not in col and 'home_' in col]
    home_rename_dict = {col: col.replace('home_', '') for col in home_cols}

    off_away_cols = [col for col in df.columns if '_offense' in col and 'away_' in col]
    off_away_rename_dict = {col: col.replace('away_', '') for col in off_away_cols}
    def_away_cols = [col for col in df.columns if '_defense' in col and 'away_' in col]
    def_away_rename_dict = {col: col.replace('away_', '') for col in def_away_cols}

    off_home_cols = [col for col in df.columns if '_offense' in col and 'home_' in col]
    off_home_rename_dict = {col: col.replace('home_', '') for col in off_home_cols}
    def_home_cols = [col for col in df.columns if '_defense' in col and 'home_' in col]
    def_home_rename_dict = {col: col.replace('home_', '') for col in def_home_cols}

    away_df = df[root_cols + away_cols + off_away_cols + def_home_cols].rename(columns={**away_rename_dict, **off_away_rename_dict, **def_home_rename_dict})
    away_df['is_home'] = 0
    home_df = df[root_cols + home_cols + off_home_cols + def_away_cols].rename(columns={**home_rename_dict, **off_home_rename_dict, **def_away_rename_dict})
    home_df['is_home'] = 1
    del df
    out_df = pd.concat([away_df, home_df])
    return out_df

def df_rename_fold(df, t1_prefix, t2_prefix):
    '''
    The reverse of a df_rename_pivot
    Fold two prefixed column types into one generic type
    Ex: away_team_id and home_team_id -> team_id
    '''
    try:
        t1_all_cols = [i for i in df.columns if t2_prefix not in i]
        t2_all_cols = [i for i in df.columns if t1_prefix not in i]

        t1_cols = [i for i in df.columns if t1_prefix in i]
        t2_cols = [i for i in df.columns if t2_prefix in i]
        t1_new_cols = [i.replace(t1_prefix, '') for i in df.columns if t1_prefix in i]
        t2_new_cols = [i.replace(t2_prefix, '') for i in df.columns if t2_prefix in i]

        t1_df = df[t1_all_cols].rename(columns=dict(zip(t1_cols, t1_new_cols)))
        t2_df = df[t2_all_cols].rename(columns=dict(zip(t2_cols, t2_new_cols)))

        df_out = pd.concat([t1_df, t2_df]).reset_index().drop(columns='index')
        return df_out
    except Exception as e:
        print("--df_rename_fold-- " + str(e))
        print(f"columns in: {df.columns}")
        print(f"shape: {df.shape}")
        return df

def df_rename_dif(df, t1_prefix=None, t2_prefix=None, t1_cols=None, t2_cols=None, sub_prefix=''):
    '''
    An extension of the df_rename_pivot
    Take the difference of two prefixed column types
    Ex: away_team_turnovers - home_team_turnovers -> team_turnovers_dif
    Note: This method applies the difference to the columns and removes the two prefixed column types
    '''
    if t1_cols is None and t2_cols is None:
        if t1_prefix is None or t2_prefix is None:
            raise Exception('You must specify either prefix or cols')
        t1_cols = [i for i in df.columns if t1_prefix in i]
        t2_cols = [i for i in df.columns if t2_prefix in i]
    for t1_col, t2_col in zip(t1_cols, t2_cols):
        if is_numeric_dtype(df[t1_col]) and is_numeric_dtype(df[t2_col]):
            df[f"dif_{t1_col.replace(t1_prefix, sub_prefix)}"] = df[t1_col] - df[t2_col]
    df_out = df.drop(columns=t1_cols + t2_cols)
    return df_out

def df_rename_exavg(df, t1_prefix=None, t2_prefix=None, t1_cols=None, t2_cols=None, sub_prefix=''):
    '''
    An extension of the df_rename_pivot
    Take the average of two prefixed column types to get the exavg (expected average)
    Ex: (away_team_turnovers + home_team_turnovers)/2 -> team_turnovers_df_rename_exavg
    Note: This method applies the exavg to the columns and removes the two prefixed column types
    '''
    if t1_cols is None and t2_cols is None:
        if t1_prefix is None or t2_prefix is None:
            raise Exception('You must specify either prefix or cols')
        t1_cols = [i for i in df.columns if t1_prefix in i]
        t2_cols = [i for i in df.columns if t2_prefix in i]
    for t1_col, t2_col in zip(t1_cols, t2_cols):
        if is_numeric_dtype(df[t1_col]) and is_numeric_dtype(df[t2_col]):
            df[f"exavg_{t1_col.replace(t1_prefix, sub_prefix)}"] = (df[t1_col] + df[t2_col]) / 2
    df_out = df.drop(columns=t1_cols + t2_cols)
    return df_out

### 2. PoC

Prove it out with Points Feature Group and then add in more features

In [4]:
#### Load data and split features into shifted and base
seasons = list(range(2023, 2025))
event_fs = pd.concat([get_event_feature_store(season) for season in seasons], ignore_index=True)
columns_for_base = META + ['home_elo_pre', 'away_elo_pre'] + VEGAS + TARGETS
columns_for_shift = ['team', 'season', 'week','is_home'] + POINT_FEATURES + JUST_SIMPLE_FEATURES
shifted_df = event_fs.copy()
base_dataset_df = event_fs[columns_for_base].copy()
del event_fs

#### Shift Features
shifted_df = df_rename_shift(shifted_df)[columns_for_shift]

#### Rename for Expected Average
t1_cols = [i for i in shifted_df.columns if '_offense' in i and (i not in TARGETS + META) and i.replace('home_', '') in columns_for_shift]
t2_cols = [i for i in shifted_df.columns if '_defense' in i and (i not in TARGETS + META) and i.replace('away_', '') in columns_for_shift]

#### Apply Expected Average
expected_features_df = df_rename_exavg(shifted_df, '_offense', '_defense', t1_cols=t1_cols, t2_cols=t2_cols)

#### Rename back into home and away features
home_exavg_features_df = expected_features_df[expected_features_df['is_home'] == 1].copy().drop(columns='is_home')
away_exavg_features_df = expected_features_df[expected_features_df['is_home'] == 0].copy().drop(columns='is_home')
home_exavg_features_df.columns = ["home_"+col if 'exavg_' in col or col == 'team' else col for col in home_exavg_features_df.columns]
away_exavg_features_df.columns = ["away_"+col if 'exavg_' in col or col == 'team' else col for col in away_exavg_features_df.columns]

#### Merge home and away Expected Average features into base as dataset_df
dataset_df = pd.merge(base_dataset_df, home_exavg_features_df, on=['home_team', 'season', 'week'], how='left')
dataset_df = pd.merge(dataset_df, away_exavg_features_df, on=['away_team', 'season', 'week'], how='left')

dataset_df['game_id'] = dataset_df.apply(lambda x: f"{x['season']}_{x['week']}_{x['home_team']}_{x['away_team']}", axis=1)

#### Fold base from away and home into team
folded_dataset_df = base_dataset_df.copy()
folded_dataset_df['game_id'] = folded_dataset_df.apply(lambda x: f"{x['season']}_{x['week']}_{x['home_team']}_{x['away_team']}", axis=1)
folded_dataset_df = folded_dataset_df.rename(columns={'spread_line': 'away_spread_line'})
folded_dataset_df['home_spread_line'] = - folded_dataset_df['away_spread_line']
folded_dataset_df['home_team_spread'] = -folded_dataset_df['away_team_spread']
folded_dataset_df['home_team_win'] = folded_dataset_df['away_team_win'] == 0
folded_dataset_df['home_team_covered_spread'] = folded_dataset_df['away_team_covered_spread'] == 0
folded_dataset_df = df_rename_fold(folded_dataset_df, 'away_', 'home_')
folded_dataset_df = pd.merge(folded_dataset_df, expected_features_df, on=['team', 'season', 'week'], how='left')
folded_dataset_df

Unnamed: 0,season,week,team,elo_pre,spread_line,total_line,score,team_win,team_spread,total_target,team_covered,under_covered,team_covered_spread,game_id,is_home,exavg_avg_points,exavg_avg_point_differential,exavg_avg_q1_point_diff,exavg_avg_q2_point_diff,exavg_avg_q3_point_diff,exavg_avg_q4_point_diff,exavg_avg_q5_point_diff,exavg_avg_q1_points,exavg_avg_q2_points,exavg_avg_q3_points,exavg_avg_q4_points,exavg_avg_q5_points,exavg_avg_carries,exavg_avg_rushing_yards,exavg_avg_rushing_tds,exavg_avg_completions,exavg_avg_attempts,exavg_avg_passing_yards,exavg_avg_passing_tds,exavg_avg_time_of_possession,exavg_avg_turnover,exavg_avg_field_goal_made
0,2023,1,DET,1486.873686,4.0,53.0,21.0,1,-1.0,41.0,1,1,1,2023_1_KC_DET,0,24.527778,-2.722222,-0.055556,-0.305556,-3.000000,0.888889,-0.250000,4.944444,7.583333,4.361111,7.638889,0.000000,26.472222,119.222222,1.027778,22.944444,35.111111,252.194444,1.805556,1799.111111,1.138889,1.416667
1,2023,1,CAR,1453.448635,3.5,40.5,10.0,0,14.0,34.0,0,1,0,2023_1_ATL_CAR,0,21.666667,-0.305556,1.055556,1.916667,-1.888889,-1.388889,0.000000,4.138889,7.972222,3.472222,6.083333,0.000000,29.305556,131.361111,0.861111,19.000000,30.138889,214.805556,1.305556,1795.611111,1.111111,1.944444
2,2023,1,HOU,1306.752814,9.5,43.5,9.0,0,16.0,34.0,0,1,0,2023_1_BAL_HOU,0,17.916667,-4.444444,-1.416667,-2.472222,-0.833333,0.361111,-0.083333,2.861111,4.750000,4.055556,6.166667,0.083333,23.750000,88.472222,0.500000,22.194444,34.694444,234.416667,1.222222,1739.388889,1.500000,1.777778
3,2023,1,CIN,1698.649026,-1.0,46.5,3.0,0,21.0,27.0,0,1,0,2023_1_CLE_CIN,0,24.027778,3.361111,-0.222222,1.444444,0.861111,1.361111,-0.083333,3.888889,8.055556,5.333333,6.750000,0.000000,26.166667,112.250000,1.000000,22.027778,34.083333,244.027778,1.694444,1810.972222,1.027778,1.611111
4,2023,1,JAX,1485.032903,-4.0,45.5,31.0,1,-10.0,52.0,1,0,1,2023_1_IND_JAX,0,24.527778,6.166667,0.194444,2.694444,1.666667,1.444444,0.166667,3.972222,8.083333,5.444444,6.777778,0.250000,28.361111,122.972222,1.000000,22.361111,33.250000,234.166667,1.500000,1833.305556,1.166667,1.777778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,2024,5,SF,1703.432979,-7.0,47.5,,1,,,0,0,1,2024_5_SF_ARI,1,23.250000,-4.500000,-7.000000,1.500000,1.250000,-0.250000,0.000000,0.750000,8.250000,7.750000,6.500000,0.000000,29.000000,116.250000,1.500000,21.000000,28.750000,249.500000,0.750000,1906.000000,1.000000,2.500000
696,2024,5,LA,1444.414598,3.0,46.0,,1,,,0,0,1,2024_5_LA_GB,1,18.500000,-8.000000,-6.750000,-3.250000,1.000000,1.000000,0.000000,0.750000,5.000000,7.750000,5.000000,0.000000,24.750000,105.000000,1.000000,22.500000,36.000000,253.750000,1.000000,1659.500000,2.000000,1.500000
697,2024,5,SEA,1536.472544,-6.0,42.0,,1,,,0,0,1,2024_5_SEA_NYG,1,24.500000,8.500000,0.250000,1.750000,6.750000,-1.000000,0.750000,5.000000,7.250000,7.500000,4.000000,0.750000,28.250000,129.500000,1.000000,23.250000,30.500000,233.000000,1.000000,1853.250000,0.750000,3.000000
698,2024,5,PIT,1598.412292,1.0,42.0,,1,,,0,0,1,2024_5_PIT_DAL,1,23.000000,6.000000,3.500000,0.000000,1.500000,1.000000,0.000000,6.750000,7.500000,5.500000,3.250000,0.000000,33.750000,140.250000,1.250000,16.250000,26.000000,171.250000,1.000000,1940.000000,0.750000,2.500000


### 3. Example Event

In [5]:
home_team = 'NYJ'
away_team = 'BUF'
season = 2023
week = 1

buf_nyj = dataset_df[(
    (dataset_df['home_team'] == home_team) & (dataset_df['away_team'] == away_team) & (dataset_df['season'] == season) & (dataset_df['week'] == week)
)].copy()

nyj = folded_dataset_df[(
    (folded_dataset_df['team'] == home_team) & (folded_dataset_df['season'] == season) & (folded_dataset_df['week'] == week)
)]

buf = folded_dataset_df[(
    (folded_dataset_df['team'] == away_team) & (folded_dataset_df['season'] == season) & (folded_dataset_df['week'] == week)
)]

## Note: looks like all point diffs need to be flipped to be correct
folded_buf_nyj = pd.concat([nyj.T, buf.T], axis=1)

print(f"Vegas lines for {home_team} vs. {away_team} in Week {week} of Season {season}:")
print(f"-- Spread: {buf['spread_line'].values[0].round(2)} (towards away team)")
print(f"-- Total: {buf['total_line'].values[0].round(2)}")
print()

print(f"Home Team Stats: {home_team}")
print(f"-- Rating: {nyj['elo_pre'].values[0].round(2)}")
print(f"-- Expected Score Q1 ")
print(f"-- Expected Score: {nyj['exavg_avg_points'].values[0].round(2)}")
print()

print(f"Away Team Stats: {away_team}")
print(f"-- Rating: {buf['elo_pre'].values[0].round(2)}")
print(f"-- Expected Score: {buf['exavg_avg_points'].values[0].round(2)}")
print()

print(f"Result")
print(f"-- Spread: Expected {nyj['exavg_avg_point_differential'].values[0].round(2)} (Actual {buf['team_spread'].values[0].round(2)})")
print(f"-- Total: Expected {nyj['exavg_avg_points'].values[0].round(2) + buf['exavg_avg_points'].values[0].round(2)} (Actual {buf['total_target'].values[0].round(2)})")
folded_buf_nyj

Vegas lines for NYJ vs. BUF in Week 1 of Season 2023:
-- Spread: -2.5 (towards away team)
-- Total: 44.5

Home Team Stats: NYJ
-- Rating: 1389.28
-- Expected Score Q1 
-- Expected Score: 17.14

Away Team Stats: BUF
-- Rating: 1702.57
-- Expected Score: 23.72

Result
-- Spread: Expected -6.5 (Actual 6.0)
-- Total: Expected 40.86 (Actual 38.0)


Unnamed: 0,365,15
season,2023,2023
week,1,1
team,NYJ,BUF
elo_pre,1389.280521,1702.570592
spread_line,2.5,-2.5
total_line,44.5,44.5
score,22.0,16.0
team_win,1,0
team_spread,-6.0,6.0
total_target,38.0,38.0


In [6]:
buf_nyj.T

Unnamed: 0,15
season,2023
week,1
home_team,NYJ
away_team,BUF
home_elo_pre,1389.280521
away_elo_pre,1702.570592
spread_line,-2.5
total_line,44.5
home_score,22.0
away_score,16.0


### 4. Evaluation

In [21]:
from sklearn.metrics import accuracy_score, mean_absolute_error

eval_season = 2023
eval_df = dataset_df[((dataset_df['season'] == eval_season)) & (dataset_df['away_score'].notnull())].copy()
eval_df['expected_spread'] = eval_df['home_exavg_avg_points'] - eval_df['away_exavg_avg_points']
eval_df['expected_total'] = eval_df['home_exavg_avg_points'] + eval_df['away_exavg_avg_points']

actual_wp = eval_df['away_team_win'].values
actual_spread = eval_df['away_team_spread'].values
actual_total = eval_df['total_target'].values

print(f'Evaluation Report for the {eval_season} Season')
print('-- The away team won: ', round(sum(actual_wp) / len(actual_wp), 4) * 100, '% of the time')
print(f"-- The average score differential was {round(actual_spread.mean(), 2)} (abs: {round(np.abs(actual_spread).mean(), 2)})")
print(f"-- The average total was {round(actual_total.mean(), 2)} with a low of {round(actual_total.min(), 2)} and a high of {round(actual_total.max(), 2)}")

print()
print('Vegas Baseline Scores')
vegas_wp = eval_df['spread_line'].apply(lambda x: 1 if x < 0 else 0)
vegas_spread = eval_df['spread_line'].values
vegas_total = eval_df['total_line'].values

print(f"-- Vegas WP: {accuracy_score(actual_wp, vegas_wp)}")
print(f"-- Vegas Spread: {mean_absolute_error(actual_spread, vegas_spread)}")
print(f"-- Vegas Total: {mean_absolute_error(actual_total, vegas_total)}")
print()

print('Expected Points Averages Scores')
exp_avg_wp = eval_df['home_exavg_avg_point_differential'].apply(lambda x: 1 if x < 0 else 0)
exp_avg_spread = eval_df['home_exavg_avg_point_differential'].values
exp_avg_total = eval_df['home_exavg_avg_points'].values + eval_df['away_exavg_avg_points'].values

print(f"-- Expected WP: {accuracy_score(actual_wp, exp_avg_wp)}")
print(f"-- Expected Spread: {mean_absolute_error(actual_spread, exp_avg_spread)}")
print(f"-- Expected Total: {mean_absolute_error(actual_total, exp_avg_total)}")

eval_df['expected_system_covered_spread'] = (eval_df['away_exavg_avg_points'] + eval_df['spread_line'] >= eval_df['home_exavg_avg_points'])
eval_df['expected_system_covered_spread'] = eval_df['expected_system_covered_spread'] == eval_df['away_team_covered']

# Calculate if the game covered the under
#eval_df['expected_system_under_covered_total'] = (eval_df['home_exavg_avg_points'] + eval_df['away_exavg_avg_points'] <= eval_df['total_line'])

eval_df.expected_system_covered_spread.sum() / len(eval_df)

Evaluation Report for the 2022 Season


ZeroDivisionError: division by zero

In [16]:
eval_df[
    [
        'season', 
        'week', 
        'away_team', 
        'home_team', 
        'away_team_spread', 
        'spread_line', 
        'expected_spread',
        'away_team_covered', 
        'expected_system_covered_spread',
        'total_line', 
        'home_score', 
        'away_score',
        'away_team_win', 
        
        'total_target',
        'home_team_covered', 
        'under_covered',
        
        'expected_total', 
        
        'expected_system_under_covered_total'
       ]]

Unnamed: 0,season,week,away_team,home_team,away_team_spread,spread_line,expected_spread,away_team_covered,expected_system_covered_spread,total_line,home_score,away_score,away_team_win,total_target,home_team_covered,under_covered,expected_total,expected_system_under_covered_total
272,2024,1,BAL,KC,7.0,3.0,-2.694444,0,True,46.0,27.0,20.0,0,47.0,1,0,42.083333,True
273,2024,1,GB,PHI,5.0,2.0,-0.5,0,True,49.5,34.0,29.0,0,63.0,1,0,46.333333,True
274,2024,1,PIT,ATL,-8.0,4.0,-0.916667,1,True,43.0,10.0,18.0,1,28.0,0,1,39.194444,True
275,2024,1,ARI,BUF,6.0,6.5,8.388889,1,False,46.0,34.0,28.0,0,62.0,0,0,46.0,True
276,2024,1,TEN,CHI,7.0,4.0,0.861111,0,True,43.0,24.0,17.0,0,41.0,1,1,41.361111,True
277,2024,1,NE,CIN,-6.0,8.0,3.75,1,True,40.5,10.0,16.0,1,26.0,0,1,40.361111,True
278,2024,1,HOU,IND,-2.0,-3.0,-1.25,0,False,49.0,27.0,29.0,1,56.0,1,0,46.305556,True
279,2024,1,JAX,MIA,3.0,3.5,2.777778,1,True,49.5,20.0,17.0,0,37.0,0,1,46.888889,True
280,2024,1,CAR,NO,37.0,3.5,6.916667,0,False,41.5,47.0,10.0,0,57.0,1,0,39.305556,True
281,2024,1,MIN,NYG,-22.0,-1.0,-4.027778,1,True,42.5,6.0,28.0,1,34.0,0,1,40.527778,True


In [10]:
eval_df.head()

Unnamed: 0,season,week,home_team,away_team,home_elo_pre,away_elo_pre,spread_line,total_line,home_score,away_score,away_team_win,away_team_spread,total_target,away_team_covered,home_team_covered,under_covered,away_team_covered_spread,home_exavg_avg_points,home_exavg_avg_point_differential,home_exavg_avg_q1_point_diff,home_exavg_avg_q2_point_diff,home_exavg_avg_q3_point_diff,home_exavg_avg_q4_point_diff,home_exavg_avg_q5_point_diff,home_exavg_avg_q1_points,home_exavg_avg_q2_points,home_exavg_avg_q3_points,home_exavg_avg_q4_points,home_exavg_avg_q5_points,home_exavg_avg_carries,home_exavg_avg_rushing_yards,home_exavg_avg_rushing_tds,home_exavg_avg_completions,home_exavg_avg_attempts,home_exavg_avg_passing_yards,home_exavg_avg_passing_tds,home_exavg_avg_time_of_possession,home_exavg_avg_turnover,home_exavg_avg_field_goal_made,away_exavg_avg_points,away_exavg_avg_point_differential,away_exavg_avg_q1_point_diff,away_exavg_avg_q2_point_diff,away_exavg_avg_q3_point_diff,away_exavg_avg_q4_point_diff,away_exavg_avg_q5_point_diff,away_exavg_avg_q1_points,away_exavg_avg_q2_points,away_exavg_avg_q3_points,away_exavg_avg_q4_points,away_exavg_avg_q5_points,away_exavg_avg_carries,away_exavg_avg_rushing_yards,away_exavg_avg_rushing_tds,away_exavg_avg_completions,away_exavg_avg_attempts,away_exavg_avg_passing_yards,away_exavg_avg_passing_tds,away_exavg_avg_time_of_possession,away_exavg_avg_turnover,away_exavg_avg_field_goal_made,game_id,expected_spread,expected_total,expected_covered_spread,vegas_covered_spread,expected_covered_total,vegas_covered_total
272,2024,1,KC,BAL,1706.761563,1681.822751,3.0,46.0,27.0,20.0,0,7.0,47.0,0,1,0,0,19.694444,-2.694444,-0.583333,0.194444,-1.805556,-0.416667,-0.083333,3.944444,7.444444,3.138889,5.083333,0.083333,24.444444,107.416667,0.527778,23.555556,37.138889,236.583333,1.333333,1792.694444,1.527778,1.888889,22.388889,2.694444,0.583333,0.805556,0.805556,0.416667,0.083333,4.527778,7.666667,4.527778,5.5,0.166667,28.472222,133.194444,1.027778,19.694444,31.388889,214.944444,1.305556,1825.888889,1.305556,1.638889,2024_1_KC_BAL,-2.694444,42.083333,True,False,False,True
273,2024,1,PHI,GB,1544.887369,1579.881723,2.0,49.5,34.0,29.0,0,5.0,63.0,0,1,0,0,22.916667,-0.5,0.944444,-0.666667,-0.444444,-0.305556,-0.027778,4.861111,6.027778,5.527778,6.25,0.25,29.5,128.055556,1.111111,20.944444,32.083333,231.194444,1.25,1838.666667,1.361111,1.888889,23.416667,0.833333,-0.944444,1.611111,-0.5,0.583333,0.083333,3.916667,7.027778,5.638889,6.75,0.083333,25.0,108.194444,0.666667,23.305556,36.111111,253.722222,1.916667,1783.583333,1.0,1.5,2024_1_PHI_GB,-0.5,46.333333,True,False,False,True
274,2024,1,ATL,PIT,1406.881419,1536.797127,4.0,43.0,10.0,18.0,1,-8.0,28.0,1,0,1,1,19.138889,-0.75,0.222222,-0.305556,-1.166667,0.5,0.0,3.916667,6.166667,3.583333,5.472222,0.0,28.694444,122.861111,0.666667,19.805556,32.277778,233.138889,1.222222,1792.583333,1.638889,1.861111,20.055556,1.0,-0.222222,0.722222,0.75,-0.25,0.0,3.694444,6.555556,4.666667,5.138889,0.0,28.972222,118.027778,0.805556,19.388889,31.055556,207.194444,1.111111,1807.416667,0.916667,1.972222,2024_1_ATL_PIT,-0.916667,39.194444,True,True,False,False
275,2024,1,BUF,ARI,1679.473922,1377.288327,6.5,46.0,34.0,28.0,0,6.0,62.0,1,0,0,1,27.194444,8.638889,0.888889,3.166667,2.055556,2.611111,-0.083333,5.083333,9.0,5.166667,7.861111,0.083333,30.416667,136.25,1.194444,21.277778,31.555556,238.444444,1.861111,1880.333333,1.25,1.416667,18.805556,-8.388889,-0.888889,-2.75,-2.472222,-2.527778,0.25,4.194444,6.0,2.944444,5.333333,0.333333,25.638889,122.583333,0.888889,21.222222,32.611111,209.944444,1.111111,1733.583333,1.611111,1.527778,2024_1_BUF_ARI,8.388889,46.0,False,True,False,True
276,2024,1,CHI,TEN,1439.27718,1413.184182,4.0,43.0,24.0,17.0,0,7.0,41.0,0,1,1,0,21.111111,0.944444,0.305556,-0.277778,-0.083333,0.833333,0.166667,3.694444,6.416667,4.75,6.0,0.25,29.166667,121.305556,0.75,20.583333,31.75,221.138889,1.138889,1875.416667,1.194444,2.305556,20.25,-0.861111,-0.305556,1.444444,-1.083333,-0.75,-0.166667,3.388889,7.527778,4.0,5.166667,0.166667,25.25,100.055556,0.722222,21.166667,32.583333,229.916667,1.305556,1761.972222,1.388889,1.638889,2024_1_CHI_TEN,0.861111,41.361111,True,False,False,False


In [37]:
folded_dataset_df

Unnamed: 0,season,week,team,elo_pre,spread_line,total_line,score,team_win,team_spread,total_target,team_covered,under_covered,team_covered_spread,game_id,is_home,exavg_avg_points,exavg_avg_point_differential,exavg_avg_q1_point_diff,exavg_avg_q2_point_diff,exavg_avg_q3_point_diff,exavg_avg_q4_point_diff,exavg_avg_q5_point_diff,exavg_avg_q1_points,exavg_avg_q2_points,exavg_avg_q3_points,exavg_avg_q4_points,exavg_avg_q5_points
0,2023,1,DET,1486.873686,4.0,53.0,21.0,1,-1.0,41.0,1,1,1,2023_1_KC_DET,0,24.527778,-2.722222,-0.055556,-0.305556,-3.000000,0.888889,-0.250000,4.944444,7.583333,4.361111,7.638889,0.000000
1,2023,1,CAR,1453.448635,3.5,40.5,10.0,0,14.0,34.0,0,1,0,2023_1_ATL_CAR,0,21.666667,-0.305556,1.055556,1.916667,-1.888889,-1.388889,0.000000,4.138889,7.972222,3.472222,6.083333,0.000000
2,2023,1,HOU,1306.752814,9.5,43.5,9.0,0,16.0,34.0,0,1,0,2023_1_BAL_HOU,0,17.916667,-4.444444,-1.416667,-2.472222,-0.833333,0.361111,-0.083333,2.861111,4.750000,4.055556,6.166667,0.083333
3,2023,1,CIN,1698.649026,-1.0,46.5,3.0,0,21.0,27.0,0,1,0,2023_1_CLE_CIN,0,24.027778,3.361111,-0.222222,1.444444,0.861111,1.361111,-0.083333,3.888889,8.055556,5.333333,6.750000,0.000000
4,2023,1,JAX,1485.032903,-4.0,45.5,31.0,1,-10.0,52.0,1,0,1,2023_1_IND_JAX,0,24.527778,6.166667,0.194444,2.694444,1.666667,1.444444,0.166667,3.972222,8.083333,5.444444,6.777778,0.250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,2024,5,SF,1703.432979,-7.0,47.5,,1,,,0,0,1,2024_5_SF_ARI,1,23.250000,-4.500000,-7.000000,1.500000,1.250000,-0.250000,0.000000,0.750000,8.250000,7.750000,6.500000,0.000000
696,2024,5,LA,1444.414598,3.0,46.0,,1,,,0,0,1,2024_5_LA_GB,1,18.500000,-8.000000,-6.750000,-3.250000,1.000000,1.000000,0.000000,0.750000,5.000000,7.750000,5.000000,0.000000
697,2024,5,SEA,1536.472544,-6.0,42.0,,1,,,0,0,1,2024_5_SEA_NYG,1,24.500000,8.500000,0.250000,1.750000,6.750000,-1.000000,0.750000,5.000000,7.250000,7.500000,4.000000,0.750000
698,2024,5,PIT,1598.412292,1.0,42.0,,1,,,0,0,1,2024_5_PIT_DAL,1,23.000000,6.000000,3.500000,0.000000,1.500000,1.000000,0.000000,6.750000,7.500000,5.500000,3.250000,0.000000


In [39]:
s = folded_dataset_df[folded_dataset_df['season'] == 2024].copy()
s['points_over_expected'] = s['score'] - s['exavg_avg_points']
s.groupby(['team'])['points_over_expected'].mean().sort_values(ascending=False).reset_index()

Unnamed: 0,team,points_over_expected
0,BUF,11.185185
1,MIN,5.490741
2,WAS,4.842593
3,NO,4.12963
4,KC,3.601852
5,NYJ,3.416667
6,ARI,2.981481
7,CIN,2.814815
8,CAR,2.768519
9,SEA,2.62037
