Following https://github.com/nathanbraun/market-share-example/blob/master/market-share.py

In [1]:
import pandas as pd
import numpy as np

In [2]:
PBP_PATH = 'https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/play_by_play_data/regular_season/reg_pbp_2019.csv'
PLAYER_PATH = 'https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/roster_data/regular_season/reg_roster_2019.csv'
GAME_PATH = 'https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/games_data/regular_season/reg_games_2019.csv'

In [63]:
# load the data
pbp = pd.read_csv(PBP_PATH, usecols=['play_id', 'game_id', 'game_date',
                                     'posteam', 'defteam', 'play_type',
                                     'complete_pass', 'receiver_player_id',
                                     'receiver_player_name',
                                     'rusher_player_id', 'rusher_player_name'])
player = pd.read_csv(PLAYER_PATH)
games = pd.read_csv(GAME_PATH)

In [5]:
pbp.head()

Unnamed: 0,play_id,game_id,posteam,defteam,game_date,play_type,complete_pass,receiver_player_id,receiver_player_name,rusher_player_id,rusher_player_name
0,35,2019090500,GB,CHI,2019-09-05,kickoff,0.0,,,,
1,50,2019090500,GB,CHI,2019-09-05,run,0.0,,,00-0033293,A.Jones
2,71,2019090500,GB,CHI,2019-09-05,pass,1.0,00-0033293,A.Jones,,
3,95,2019090500,GB,CHI,2019-09-05,pass,0.0,,,,
4,125,2019090500,GB,CHI,2019-09-05,punt,0.0,,,,


In [6]:
player.head()

Unnamed: 0,season,season_type,full_player_name,abbr_player_name,team,position,gsis_id
0,2019,reg,A.J. Brown,A.Brown,TEN,WR,00-0035676
1,2019,reg,Aaron Jones,A.Jones,GB,RB,00-0033293
2,2019,reg,Aaron Rodgers,A.Rodgers,GB,QB,00-0023459
3,2019,reg,Adam Humphries,A.Humphries,TEN,WR,00-0032009
4,2019,reg,Adam Shaheen,A.Shaheen,CHI,TE,00-0033896


In [7]:
games.head()

Unnamed: 0,type,game_id,home_team,away_team,week,season,state_of_game,game_url,home_score,away_score
0,reg,2019090500,CHI,GB,1,2019,POST,http://www.nfl.com/liveupdate/game-center/2019...,3,10
1,reg,2019090800,CAR,LA,1,2019,POST,http://www.nfl.com/liveupdate/game-center/2019...,27,30
2,reg,2019090806,PHI,WAS,1,2019,POST,http://www.nfl.com/liveupdate/game-center/2019...,32,27
3,reg,2019090805,NYJ,BUF,1,2019,POST,http://www.nfl.com/liveupdate/game-center/2019...,16,17
4,reg,2019090804,MIN,ATL,1,2019,POST,http://www.nfl.com/liveupdate/game-center/2019...,28,12


Getting weekly stats

In [42]:
weekly_rush_stats = (pbp
                     .query("play_type == 'run'")
                     .groupby(['game_id', 'rusher_player_id', 'rusher_player_name'])
                     .agg(carries = ('play_id', 'count'))
                     .reset_index()
                     .rename(columns={
                         'rusher_player_id': 'player_id',
                         'rusher_player_name': 'player_name'}))

In [43]:
weekly_rush_stats.head()

Unnamed: 0,game_id,player_id,player_name,carries
0,2019090500,00-0023459,A.Rodgers,1
1,2019090500,00-0030578,C.Patterson,1
2,2019090500,00-0032063,M.Davis,5
3,2019090500,00-0033293,A.Jones,13
4,2019090500,00-0033869,M.Trubisky,3


In [48]:
weekly_rec_stats = (pbp
                    .query("play_type == 'pass'")
                    .groupby(['game_id', 'receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))

In [49]:
weekly_rec_stats.head()

Unnamed: 0,game_id,player_id,player_name,targets,catches
0,2019090500,00-0024243,M.Lewis,3,2.0
1,2019090500,00-0027696,J.Graham,5,3.0
2,2019090500,00-0030578,C.Patterson,3,1.0
3,2019090500,00-0031228,T.Gabriel,5,2.0
4,2019090500,00-0031381,D.Adams,8,4.0


In [34]:
weekly_stats_player = pd.merge(weekly_rec_stats, weekly_rush_stats, how='outer').fillna(0)

In [35]:
weekly_stats_player.head()

Unnamed: 0,game_id,player_id,player_name,targets,catches,carries
0,2019090500,00-0024243,M.Lewis,3.0,2.0,0.0
1,2019090500,00-0027696,J.Graham,5.0,3.0,0.0
2,2019090500,00-0030578,C.Patterson,3.0,1.0,1.0
3,2019090500,00-0031228,T.Gabriel,5.0,2.0,0.0
4,2019090500,00-0031381,D.Adams,8.0,4.0,0.0


Season-long stats by player

In [44]:
season_rush_stats = (pbp
                     .query("play_type == 'run'")
                     .groupby(['rusher_player_id', 'rusher_player_name'])
                     .agg(carries = ('play_id', 'count'))
                     .reset_index()
                     .rename(columns={
                         'rusher_player_id': 'player_id',
                         'rusher_player_name': 'player_name'}))

In [45]:
season_rush_stats.head()

Unnamed: 0,player_id,player_name,carries
0,00-0019596,T.Brady,11
1,00-0020531,D.Brees,2
2,00-0022803,E.Manning,3
3,00-0022924,B.Roethlisberger,1
4,00-0022942,P.Rivers,10


In [46]:
season_rush_stats.query("player_name == 'E.Elliott'")

Unnamed: 0,player_id,player_name,carries
169,00-0033045,E.Elliott,301


In [50]:
season_rec_stats = (pbp
                    .query("play_type == 'pass'")
                    .groupby(['receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))

In [51]:
season_rec_stats.query("player_name == 'J.Landry'")

Unnamed: 0,player_id,player_name,targets,catches
128,00-0031382,J.Landry,138,83.0


In [52]:
season_stats_player = pd.merge(season_rush_stats, season_rec_stats, how='outer').fillna(0)

In [53]:
season_stats_player.head()

Unnamed: 0,player_id,player_name,carries,targets,catches
0,00-0019596,T.Brady,11.0,0.0,0.0
1,00-0020531,D.Brees,2.0,0.0,0.0
2,00-0022803,E.Manning,3.0,0.0,0.0
3,00-0022924,B.Roethlisberger,1.0,0.0,0.0
4,00-0022942,P.Rivers,10.0,0.0,0.0


In [56]:
season_stats_player.query("player_name == 'C.McCaffrey'")

Unnamed: 0,player_id,player_name,carries,targets,catches
178,00-0033280,C.McCaffrey,288.0,142.0,116.0


In [57]:
season_stats_player.query("player_name == 'E.Elliott'")

Unnamed: 0,player_id,player_name,carries,targets,catches
169,00-0033045,E.Elliott,301.0,71.0,54.0


In [58]:
season_stats_player.query("player_name == 'J.Landry'")

Unnamed: 0,player_id,player_name,carries,targets,catches
100,00-0031382,J.Landry,1.0,138.0,83.0


In [60]:
pbp.head()

Unnamed: 0,play_id,game_id,posteam,defteam,game_date,play_type,complete_pass,receiver_player_id,receiver_player_name,rusher_player_id,rusher_player_name
0,35,2019090500,GB,CHI,2019-09-05,kickoff,0.0,,,,
1,50,2019090500,GB,CHI,2019-09-05,run,0.0,,,00-0033293,A.Jones
2,71,2019090500,GB,CHI,2019-09-05,pass,1.0,00-0033293,A.Jones,,
3,95,2019090500,GB,CHI,2019-09-05,pass,0.0,,,,
4,125,2019090500,GB,CHI,2019-09-05,punt,0.0,,,,


In [64]:
pbp_full = pd.read_csv(PBP_PATH)

In [142]:
# Fix mistake in pbp data
#J.Landry did not score a TD in week 1
pbp_full.at[1407,'touchdown'] = 0.0

In [71]:
for col in pbp_full.columns:
    print(col)

play_id
game_id
home_team
away_team
posteam
posteam_type
defteam
side_of_field
yardline_100
game_date
quarter_seconds_remaining
half_seconds_remaining
game_seconds_remaining
game_half
quarter_end
drive
sp
qtr
down
goal_to_go
time
yrdln
ydstogo
ydsnet
desc
play_type
yards_gained
shotgun
no_huddle
qb_dropback
qb_kneel
qb_spike
qb_scramble
pass_length
pass_location
air_yards
yards_after_catch
run_location
run_gap
field_goal_result
kick_distance
extra_point_result
two_point_conv_result
home_timeouts_remaining
away_timeouts_remaining
timeout
timeout_team
td_team
posteam_timeouts_remaining
defteam_timeouts_remaining
total_home_score
total_away_score
posteam_score
defteam_score
score_differential
posteam_score_post
defteam_score_post
score_differential_post
no_score_prob
opp_fg_prob
opp_safety_prob
opp_td_prob
fg_prob
safety_prob
td_prob
extra_point_prob
two_point_conversion_prob
ep
epa
total_home_epa
total_away_epa
total_home_rush_epa
total_away_rush_epa
total_home_pass_epa
total_away_pass_e

In [183]:
pbp_full.query("fumble == 1").query("receiver_player_name == 'S.Barkley'")

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv
1941,209,2019090810,DAL,NYG,NYG,away,DAL,NYG,91.0,2019-09-08,...,,,,0,,,0.0,0.0,0.0,0.0


In [159]:
pbp_full.iloc[179]

play_id                                 171
game_id                          2019090800
home_team                               CAR
away_team                                LA
posteam                                 CAR
                                    ...    
penalty_type                            NaN
defensive_two_point_attempt               0
defensive_two_point_conv                  0
defensive_extra_point_attempt             0
defensive_extra_point_conv                0
Name: 179, Length: 256, dtype: object

In [92]:
pbp_full.query("touchdown == 1")

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv
42,1159,2019090500,CHI,GB,GB,away,CHI,CHI,8.0,2019-09-05,...,,,,0,,,0.0,0.0,0.0,0.0
221,1160,2019090800,CAR,LA,LA,away,CAR,CAR,5.0,2019-09-08,...,,,,0,,,0.0,0.0,0.0,0.0
283,2702,2019090800,CAR,LA,CAR,home,LA,LA,8.0,2019-09-08,...,,,,0,,,0.0,0.0,0.0,0.0
301,3144,2019090800,CAR,LA,LA,away,CAR,CAR,1.0,2019-09-08,...,,,,0,,,0.0,0.0,0.0,0.0
322,3675,2019090800,CAR,LA,CAR,home,LA,LA,2.0,2019-09-08,...,,,,0,,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45471,2344,2019122915,SEA,SF,SEA,home,SF,SF,14.0,2019-12-29,...,,,,0,,,0.0,0.0,0.0,0.0
45478,2499,2019122915,SEA,SF,SF,away,SEA,SEA,2.0,2019-12-29,...,,,,0,,,0.0,0.0,0.0,0.0
45496,2928,2019122915,SEA,SF,SEA,home,SF,SF,1.0,2019-12-29,...,,,,0,,,0.0,0.0,0.0,0.0
45506,3159,2019122915,SEA,SF,SF,away,SEA,SEA,13.0,2019-12-29,...,,,,0,,,0.0,0.0,0.0,0.0


In [197]:
season_rush_stats = (pbp_full
                     .query("play_type == 'run' or play_type == 'qb_kneel'")
                     .groupby(['rusher_player_id', 'rusher_player_name'])
                     .agg(carries = ('play_id', 'count'),
                          rush_yards = ('yards_gained', 'sum'),
                          rush_tds = ('touchdown', 'sum'))
                     .reset_index()
                     .rename(columns={
                         'rusher_player_id': 'player_id',
                         'rusher_player_name': 'player_name'}))

In [198]:
season_rush_stats.head()

Unnamed: 0,player_id,player_name,carries,rush_yards,rush_tds
0,00-0019596,T.Brady,26,34,3.0
1,00-0020531,D.Brees,9,-4,1.0
2,00-0021206,J.McCown,2,-2,0.0
3,00-0022787,M.Schaub,3,-3,0.0
4,00-0022803,E.Manning,4,7,0.0


In [199]:
season_rush_stats.query("player_name == 'S.Barkley'")

Unnamed: 0,player_id,player_name,carries,rush_yards,rush_tds
279,00-0034844,S.Barkley,217,1003,6.0


In [201]:
season_rec_stats.head()

Unnamed: 0,player_id,player_name,targets,catches,rec_yards,rec_tds
0,00-0022127,J.Witten,85,63.0,531,4.0
1,00-0022921,L.Fitzgerald,108,75.0,804,6.0
2,00-0022943,B.Watson,24,17.0,173,0.0
3,00-0023500,F.Gore,16,13.0,100,0.0
4,00-0023564,D.Sproles,10,6.0,24,0.0


In [214]:
season_stats = pd.merge(season_rush_stats,season_rec_stats,how='outer').fillna(0)

In [215]:
season_stats.head()

Unnamed: 0,player_id,player_name,carries,rush_yards,rush_tds,targets,catches,rec_yards,rec_tds
0,00-0019596,T.Brady,26.0,34.0,3.0,0.0,0.0,0.0,0.0
1,00-0020531,D.Brees,9.0,-4.0,1.0,0.0,0.0,0.0,0.0
2,00-0021206,J.McCown,2.0,-2.0,0.0,0.0,0.0,0.0,0.0
3,00-0022787,M.Schaub,3.0,-3.0,0.0,0.0,0.0,0.0,0.0
4,00-0022803,E.Manning,4.0,7.0,0.0,0.0,0.0,0.0,0.0


In [216]:
season_stats.query("player_name == 'S.Barkley'")

Unnamed: 0,player_id,player_name,carries,rush_yards,rush_tds,targets,catches,rec_yards,rec_tds
279,00-0034844,S.Barkley,217.0,1003.0,6.0,73.0,52.0,438.0,2.0


In [None]:
season_fum_stats = (pbp_full
                    .query("play_type == 'pass'")
                    .groupby(['receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'),
                         rec_yards = ('yards_gained','sum'),
                         rec_tds = ('touchdown','sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))


                         fumbles = ('fumble', 'sum'),
                         fumbles_lost = ('fumble_lost', 'sum'))

In [227]:
ppr = 0.5
td_pt = 6.0
ppc = 0.0
yd_pt = 0.1

In [228]:
season_stats["fantasy_points"] = ppr*season_stats.catches + td_pt*(season_stats.rush_tds + season_stats.rec_tds) + ppc*season_stats.carries + yd_pt*(season_stats.rush_yards + season_stats.rec_yards)

In [229]:
season_stats.sort_values(by=['fantasy_points'],ascending = False).head()

Unnamed: 0,player_id,player_name,carries,rush_yards,rush_tds,targets,catches,rec_yards,rec_tds,fantasy_points
182,00-0033280,C.McCaffrey,288.0,1389.0,15.0,142.0,116.0,1005.0,4.0,411.4
156,00-0032765,M.Thomas,1.0,-9.0,0.0,185.0,149.0,1725.0,9.0,300.1
185,00-0033293,A.Jones,236.0,1084.0,16.0,68.0,49.0,474.0,3.0,294.3
155,00-0032764,D.Henry,303.0,1540.0,16.0,24.0,18.0,206.0,2.0,291.6
173,00-0033045,E.Elliott,301.0,1357.0,12.0,71.0,54.0,420.0,2.0,288.7
