In [22]:
import pandas as pd
import numpy as np
import sys
sys.path.append('/Users/joeday/Documents/Fantasy/fantasy_football_season_projections')

from config import INPUT_DATA_LOC, OUTPUT_DATA_LOC, MIN_YR, MAX_YR, NEEDED_COLUMNS, MAX_WEEK

In [23]:
game_week_map = pd.read_csv(OUTPUT_DATA_LOC + 'game_week_map.csv')
fantasy_plays = pd.read_csv(OUTPUT_DATA_LOC + 'fantasy_plays.csv')
weeks_played = pd.read_csv(OUTPUT_DATA_LOC + 'weeks_played.csv')
player_id_map = pd.read_csv(OUTPUT_DATA_LOC + 'player_id_map.csv')
position_map = pd.read_csv(OUTPUT_DATA_LOC + 'position_map.csv')
roster_map = pd.read_csv(OUTPUT_DATA_LOC + 'roster_map.csv')

# Team Rushing Stats

### Rushes per Team by Season

In [24]:
rushing_plays = fantasy_plays[fantasy_plays['play_type'] == 'run']
total_rushes_by_team = rushing_plays.groupby(['posteam', 'season'])['play_id'].count().reset_index()
total_rushes_by_team.columns = ['posteam', 'season', 'team_carries']
total_rushes_by_team['team_carries_per_game'] = total_rushes_by_team['team_carries'] / (MAX_WEEK-1)

In [25]:
total_rushes_by_team

Unnamed: 0,posteam,season,team_carries,team_carries_per_game
0,ARI,2009,331,22.066667
1,ARI,2010,300,20.000000
2,ARI,2011,349,23.266667
3,ARI,2012,332,22.133333
4,ARI,2013,382,25.466667
...,...,...,...,...
348,WAS,2015,382,25.466667
349,WAS,2016,353,23.533333
350,WAS,2017,376,25.066667
351,WAS,2018,395,26.333333


### Carries per Game per Player by Season

In [26]:
rushing_plays = fantasy_plays[fantasy_plays['play_type'] == 'run']

carries_by_player = rushing_plays.\
                                groupby(['rusher_player_id',
                                         'season'])\
                                ['play_id'].\
                                count().\
                                reset_index()

carries_by_player.columns = ['rusher_player_id',
                             'season',
                             'carries']

carries_by_player = carries_by_player.sort_values(by='rusher_player_id')

rushing_plays['capped_rushing_yds'] = np.where(rushing_plays['yards_gained'] > 15,
                                               15,
                                               rushing_plays['yards_gained'])

rushing_yds_by_player = rushing_plays.\
                                groupby(['rusher_player_id',
                                         'season'])\
                                [['yards_gained', 'capped_rushing_yds', 'rush_touchdown', 'fumble']].\
                                sum().\
                                reset_index()

rushing_df = carries_by_player.merge(rushing_yds_by_player,
                             how='inner',
                             on=['rusher_player_id','season']
                            )

In [27]:
rushing_df = rushing_df.\
                merge(weeks_played,
                      how='inner',
                      left_on=['rusher_player_id','season'],
                      right_on=['player_id','season'])

rushing_df.drop('player_id', axis=1, inplace=True)

rushing_df.drop_duplicates(subset=['rusher_player_id','season'], keep='first', inplace=True)

In [28]:
rushing_df['carries_per_game'] = \
    rushing_df['carries'] / \
    rushing_df['games_played']

rushing_df['yards_per_game'] = \
    rushing_df['yards_gained'] / \
    rushing_df['games_played']

rushing_df['yards_per_carry'] = \
    rushing_df['yards_gained'] / \
    rushing_df['carries']

rushing_df['capped_yards_per_game'] = \
    rushing_df['capped_rushing_yds'] / \
    rushing_df['games_played']

rushing_df['capped_yards_per_carry'] = \
    rushing_df['capped_rushing_yds'] / \
    rushing_df['carries']

rushing_df['tds_per_carry'] = \
    rushing_df['rush_touchdown'] / \
    rushing_df['carries']

rushing_df['tds_per_game'] = \
    rushing_df['rush_touchdown'] / \
    rushing_df['games_played']

rushing_df['fumbles_per_carry'] = \
    rushing_df['fumble'] / \
    rushing_df['carries']

rushing_df['fumbles_per_game'] = \
    rushing_df['fumble'] / \
    rushing_df['games_played']

rushing_df = rushing_df[['rusher_player_id',
                         'season',
                         'carries',
                         'games_played',
                         'carries_per_game',
                         'yards_gained',
                         'yards_per_game',
                         'yards_per_carry',
                         'capped_yards_per_game',
                         'capped_yards_per_carry',
                         'rush_touchdown',
                         'tds_per_carry',
                         'tds_per_game',
                         'fumble',
                         'fumbles_per_carry',
                         'fumbles_per_game']]

### RB Workload PCT

In [29]:
rushing_df = rushing_df.merge(roster_map,
                     how='inner',
                     left_on=['rusher_player_id', 'season'],
                     right_on=['player_id', 'season'])

In [30]:
rushing_df =  rushing_df.merge(total_rushes_by_team,
                                 how='left',
                                 left_on=['season', 'team'],
                                 right_on=['season', 'posteam'])

In [31]:
rushing_df['season_workload_pct'] = rushing_df['carries'] / rushing_df['team_carries']
rushing_df['by_game_workload_pct'] = rushing_df['carries_per_game'] / rushing_df['team_carries_per_game']

## Can we account for roster turnover?

In [32]:
# merge roster_map back on itself with players next season team

roster_map['next_season'] = roster_map['season'] + 1

temp_roster_df = roster_map.merge(roster_map,
                             how='left',
                             left_on=['player_id', 'next_season'],
                             right_on=['player_id', 'season'],
                             suffixes=('', '_x'))

temp_roster_df = temp_roster_df[(temp_roster_df['next_season'] == temp_roster_df['season_x']) |
                                (temp_roster_df['season_x'].isna())]

temp_roster_df = temp_roster_df[['season',
                                 'player_id',
                                 'team',
                                 'team_x']]

temp_roster_df.rename({'team_x':'next_team'}, axis=1, inplace=True)

roster_map = temp_roster_df

In [33]:
rushing_df = rushing_df.merge(roster_map,
                                how='left',
                                left_on=['rusher_player_id', 'season'],
                                right_on=['player_id', 'season'],
                                suffixes=('', '_x'))

#### NOTE: I'm leaving in any players that do not play in the next season during the feature engineering stage, but will remember to remove before training. 

In [34]:
returning_carries_df = rushing_df[rushing_df['team'] == rushing_df['next_team']].\
                                groupby(['team', 'season'])\
                                ['carries'].\
                                sum().\
                                reset_index()

returning_carries_df.columns = ['team', 'season', 'returning_carries']

In [35]:
incoming_carries_df = rushing_df.groupby(['next_team', 'season'])\
                            ['carries'].\
                            sum().\
                            reset_index()

incoming_carries_df.columns = ['team', 'season', 'incoming_carries']

#### NOTE: Both returning_carries and incoming_carries are attemps to evaluate the carryover / changes in talent on a roster from a past season to a future one. This isn't forward leakage under the assumption that you know a team's roster for the upcoming season prior to drafting and that you know each player on that roster's carries for the previous season. 

In [36]:
total_rushes_by_team = total_rushes_by_team.merge(returning_carries_df,
                                      how='left',
                                      left_on=['posteam', 'season'],
                                      right_on=['team', 'season']).\
                                merge(incoming_carries_df,
                                      how='left',
                                      left_on=['posteam', 'season'],
                                      right_on=['team', 'season'])

total_rushes_by_team.drop(['team_x', 'team_y'], axis=1, inplace=True)

In [37]:
total_rushes_by_team['returning_carry_pct'] = \
    total_rushes_by_team['returning_carries'] / \
    total_rushes_by_team['team_carries']
    
total_rushes_by_team['incoming_carry_pct'] = \
    total_rushes_by_team['incoming_carries'] / \
    total_rushes_by_team['team_carries']

In [38]:
rushing_df = rushing_df.merge(total_rushes_by_team,
                    how='left',
                    left_on=['next_team', 'season'],
                    right_on=['posteam', 'season'],
                    suffixes=('', '_x'))

In [39]:
rushing_df['same_team'] = rushing_df['team'] == rushing_df['next_team']
rushing_df['carries_per_incoming_carry'] = rushing_df['carries'] / rushing_df['incoming_carries']

In [40]:
rushing_df = rushing_df[['rusher_player_id',
                        'season',
                        'team',
                        'games_played',
                        'yards_gained',
                        'carries',
                        'yards_per_game',
                        'yards_per_carry',
                        'capped_yards_per_game',
                        'capped_yards_per_carry',
                        'rush_touchdown',
                        'tds_per_carry',
                        'tds_per_game',
                        'fumble',
                        'fumbles_per_carry',
                        'fumbles_per_game',
                        'carries_per_game',
                        'team_carries',
                        'team_carries_per_game',
                        'season_workload_pct',
                        'by_game_workload_pct',
                        'next_team',
                        'same_team',
                        'returning_carry_pct',
                        'incoming_carry_pct',
                        'carries_per_incoming_carry']]

In [43]:
rushing_df.sort_values(by='carries', ascending=False)

Unnamed: 0,rusher_player_id,season,team,games_played,yards_gained,carries,yards_per_game,yards_per_carry,capped_yards_per_game,capped_yards_per_carry,...,carries_per_game,team_carries,team_carries_per_game,season_workload_pct,by_game_workload_pct,next_team,same_team,returning_carry_pct,incoming_carry_pct,carries_per_incoming_carry
1566,00-0028009,2014,DAL,15,1745.0,373,116.333333,4.678284,103.200000,4.150134,...,24.866667,468.0,31.200000,0.797009,0.797009,PHI,False,0.177156,1.221445,0.711832
1070,00-0026796,2012,HOU,15,1315.0,335,87.666667,3.925373,80.666667,3.611940,...,22.333333,473.0,31.533333,0.708245,0.708245,HOU,True,0.862579,0.873150,0.811138
853,00-0026164,2009,TEN,15,1874.0,323,124.933333,5.801858,87.266667,4.052632,...,21.533333,453.0,30.200000,0.713024,0.713024,TEN,True,0.860927,0.860927,0.828205
2769,00-0033045,2016,DAL,15,1631.0,322,108.733333,5.065217,94.066667,4.381988,...,21.466667,463.0,30.866667,0.695464,0.695464,DAL,True,0.958963,0.958963,0.725225
2176,00-0030496,2017,PIT,15,1291.0,322,86.066667,4.009317,83.800000,3.903727,...,21.466667,390.0,26.000000,0.825641,0.825641,,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324,00-0022999,2017,NO,1,2.0,1,2.000000,2.000000,2.000000,2.000000,...,1.000000,399.0,26.600000,0.002506,0.037594,,False,,,
1307,00-0027583,2013,CAR,1,1.0,1,1.000000,1.000000,1.000000,1.000000,...,1.000000,438.0,29.200000,0.002283,0.034247,,False,,,
1297,00-0027518,2011,BUF,1,2.0,1,2.000000,2.000000,2.000000,2.000000,...,1.000000,366.0,24.400000,0.002732,0.040984,BUF,True,1.060109,1.060109,0.002577
1723,00-0028434,2011,SEA,1,-2.0,1,-2.000000,-2.000000,-2.000000,-2.000000,...,1.000000,404.0,26.933333,0.002475,0.037129,SEA,True,0.801980,0.804455,0.003077
