In [131]:
import pandas as pd
import plotly.graph_objects as go

# 2024 Fantasy Football Analysis!

This year we are starting off with some lessons learned from 2023.

I will be focusing specifically this year on offensive positions.

Things we want to implement this year:
1. A UI to make the picking process more seamless
2. Dynamic picking process that matches the positions available each week.

Attributing Points:
- Touchdown = 7*(yds for play/net yards)
- Field Goal = 3*(yds for play/net yards)
- First Down = 7/10*(yds for play/net yards) = 0.7*(yds for play/net yards)

Things not considering:
1. laterals

### EDA!

Columns we Will Entertain:

1. ID Columns
    - game_id
    - play_id
    - home_team
    - away_team
    - season_type
    - posteam
    - defteam
    - touchdown
    - play_type
    - passer_player_id
    - passer_player_name
    - receiver_player_id
    - receiver_player_name
    - rusher_player_id
    - rusher_player_name
    - season
    - div_game
    - home_coach
    - away_coach
    

2. Analysis Columns
    - yards_gained
    - yds_net
    - incomplete_pass
    - interception
    - sack
    - series_result
    - sack_player_id
    - sack_player_name
    - fumbled_1_player_id
    - fumbled_1_player_name
    

In [132]:
pbp_data = (pd.read_csv('../pbp_data.csv')
            .drop(columns=['Unnamed: 0']))[['game_id',
                                            'play_id',
                                            'home_team',
                                            'away_team',
                                            'season_type',
                                            'posteam',
                                            'defteam',
                                            'touchdown',
                                            'play_type',
                                            'passer_player_id',
                                            'passer_player_name',
                                            'receiver_player_id',
                                            'receiver_player_name',
                                            'rusher_player_id',
                                            'rusher_player_name',
                                            'season',
                                            'home_coach',
                                            'away_coach',
                                            'yards_gained',
                                            'ydsnet',
                                            'incomplete_pass',
                                            'interception',
                                            'sack',
                                            'series_result',
                                            'sack_player_id',
                                            'sack_player_name',
                                            'fumbled_1_player_id',
                                            'fumbled_1_player_name']]

points_attrib = {'First Down': 0.7,
                 'Touchdown': 7,
                 'Field Goal': 0.3}

pbp_data.loc[:, 'series_points'] = pbp_data.series_result.map(points_attrib)
pbp_data.loc[pbp_data.ydsnet != 0, 'net_points_gained'] = pbp_data.series_points * (pbp_data.yards_gained/pbp_data.ydsnet)
pbp_data.loc[pbp_data.net_points_gained.isna(), 'net_points_gained'] = 0



Columns (46,180,181,183,184,190,191,194,195,196,197,198,199,204,205,206,207,210,211,214,215,219,220,221,223,225,227,234,235,236,237,238,239,244,245,246,249,250,254,255,256,261,263,264,267,268,269,270,278,284,285,303,333) have mixed types. Specify dtype option on import or set low_memory=False.



In [133]:
pbp_data.head(5)

Unnamed: 0,game_id,play_id,home_team,away_team,season_type,posteam,defteam,touchdown,play_type,passer_player_id,...,incomplete_pass,interception,sack,series_result,sack_player_id,sack_player_name,fumbled_1_player_id,fumbled_1_player_name,series_points,net_points_gained
0,2018_01_ATL_PHI,1,PHI,ATL,REG,,,,,,...,,,,First down,,,,,,0.0
1,2018_01_ATL_PHI,37,PHI,ATL,REG,ATL,PHI,0.0,kickoff,,...,0.0,0.0,0.0,First down,,,,,,0.0
2,2018_01_ATL_PHI,52,PHI,ATL,REG,ATL,PHI,0.0,no_play,,...,0.0,0.0,0.0,First down,,,,,,0.0
3,2018_01_ATL_PHI,75,PHI,ATL,REG,ATL,PHI,0.0,pass,00-0026143,...,0.0,0.0,0.0,First down,,,,,,0.0
4,2018_01_ATL_PHI,104,PHI,ATL,REG,ATL,PHI,0.0,run,,...,0.0,0.0,0.0,First down,,,,,,0.0


### Some Notes
Most of the football season is the regular season.

Let's filter out all POST season games so that we can capture stats outside of high pressure and variable games.

Manipulations:
1. season_type == 'REG'
2. Filter out passing, rushing, and receiving player ids with our rosters dataset. We will merge these positions.
    - We only want QBs, RBs, WRs, and TEs.

In [134]:
who_cares_the_others = ['QB','RB','WR','TE']

In [135]:
# 1.
reg_season = pbp_data.loc[pbp_data.loc[:, 'season_type'] == 'REG']

In [136]:
# 2.
rosters = pd.read_csv('../Data/2018-2023_rosters.csv')[['season',
                                                        'position',
                                                        'player_id']]

# Passing
passing_df = reg_season.merge(rosters.rename(columns={'player_id': 'passer_player_id',
                                                            'position': 'passing_position'}), on=['season', 'passer_player_id'], how='left')

passing_df = passing_df.loc[passing_df.passing_position.isin(who_cares_the_others), :]


# Receiving
receiving_df = reg_season.merge(rosters.rename(columns={'player_id': 'receiver_player_id',
                                                             'position': 'receiving_position'}), on=['season', 'receiver_player_id'], how='left')

receiving_df = receiving_df.loc[receiving_df.receiving_position.isin(who_cares_the_others), :]

# Rushing
rushing_df = reg_season.merge(rosters.rename(columns={'player_id': 'rusher_player_id',
                                                      'position': 'rusher_position'}), on=['season', 'rusher_player_id'], how='left')

rushing_df = rushing_df.loc[rushing_df.rusher_position.isin(who_cares_the_others), :]

In [137]:
passing_df.columns

Index(['game_id', 'play_id', 'home_team', 'away_team', 'season_type',
       'posteam', 'defteam', 'touchdown', 'play_type', 'passer_player_id',
       'passer_player_name', 'receiver_player_id', 'receiver_player_name',
       'rusher_player_id', 'rusher_player_name', 'season', 'home_coach',
       'away_coach', 'yards_gained', 'ydsnet', 'incomplete_pass',
       'interception', 'sack', 'series_result', 'sack_player_id',
       'sack_player_name', 'fumbled_1_player_id', 'fumbled_1_player_name',
       'series_points', 'net_points_gained', 'passing_position'],
      dtype='object')

### Who's Doing What

In [138]:
# Passing
category_counts = passing_df.passing_position.value_counts()

categories = category_counts.index
counts = category_counts.values

fig = go.Figure(data=[go.Bar(x=categories, y=counts)])

fig.update_layout(
    title='Passing Plays by Position',
    xaxis_title='Position',
    yaxis_title='Count'
)

display(fig.show())

# Receiving
category_counts = receiving_df.receiving_position.value_counts()

categories = category_counts.index
counts = category_counts.values

fig = go.Figure(data=[go.Bar(x=categories, y=counts)])

fig.update_layout(
    title='Receiving Plays by Position',
    xaxis_title='Position',
    yaxis_title='Count'
)

display(fig.show())

# Rushing
category_counts = rushing_df.rusher_position.value_counts()

categories = category_counts.index
counts = category_counts.values

fig = go.Figure(data=[go.Bar(x=categories, y=counts)])

fig.update_layout(
    title='Rushing Plays by Position',
    xaxis_title='Position',
    yaxis_title='Count'
)

display(fig.show())



None

None

None

### Let's Focus On Rushing!

Rushing is very easy. There is no change in possession therefore all yards are attribute to these beasts!

Features Considering:
   - game_id
   - play_id
   - touchdown
   - rusher_player_id
   - rusher_player_name
   - season
   - yards_gained
   - yards_gained
   - net_points_gained

In [139]:
rushing_df_necessary_cols = rushing_df.loc[:, ['game_id','play_id','rusher_player_id','rusher_player_name', 'touchdown',
                                               'season','yards_gained','series_result','net_points_gained']]

In [140]:
rushing_game_agg = rushing_df_necessary_cols.groupby(by=['rusher_player_id','game_id','season']).agg({'yards_gained': 'sum',
                                                                                                      'net_points_gained': 'sum',
                                                                                                      'touchdown': 'sum'}).reset_index().rename(columns={'yards_gained': 'rushings_yards_gained',
                                                                                                                                                         'net_points_gained': 'rushings_net_points_gained',
                                                                                                                                                         'touchdown': 'rushings_touchdown'})

### Now Passing

In [141]:
passing_df_necessary_cols = passing_df.loc[:, ['game_id','play_id','passer_player_id','passer_player_name', 'touchdown',
                                               'season','yards_gained','series_result','net_points_gained']]

In [143]:
passing_game_agg = passing_df_necessary_cols.groupby(by=['passer_player_id','game_id','season']).agg({'yards_gained': 'sum',
                                                                                                      'net_points_gained': 'sum',
                                                                                                      'touchdown': 'sum'}).reset_index().rename(columns={'yards_gained': 'passing_yards_gained',
                                                                                                                                                         'net_points_gained': 'passing_net_points_gained',
                                                                                                                                                         'touchdown': 'passing_touchdown'})

### Now Receiving

In [152]:
receiving_necessary_cols = receiving_df.loc[:, ['game_id','play_id','receiver_player_id','receiver_player_name', 'touchdown',
                                               'season','yards_gained','series_result','net_points_gained']]

In [154]:
receiving_game_agg = receiving_necessary_cols.groupby(by=['receiver_player_id','game_id','season']).agg({'yards_gained': 'sum',
                                                                                                      'net_points_gained': 'sum',
                                                                                                      'touchdown': 'sum'}).reset_index().rename(columns={'yards_gained': 'receiving_yards_gained',
                                                                                                                                                         'net_points_gained': 'receiving_net_points_gained',
                                                                                                                                                         'touchdown': 'receiving_touchdown'})

### Combine them all

In [159]:
merge_one = rushing_game_agg.rename(columns={'rusher_player_id':'player_id'}).merge(passing_game_agg.rename(columns={'passer_player_id':'player_id'}), on=['player_id','game_id','season'], how='outer')

In [162]:
merge_two = merge_one.merge(receiving_game_agg.rename(columns={'receiver_player_id':'player_id'}), on=['player_id','game_id','season'], how='outer').fillna(0)

In [164]:
merge_two.to_csv('../Data/andrews_agg.csv')