Following https://github.com/nathanbraun/market-share-example/blob/master/market-share.py

# Feel free to get ideas from this, but don't take the stats as gospel

In [None]:
import pandas as pd
import numpy as np

In [None]:
PBP_PATH = 'https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/play_by_play_data/regular_season/reg_pbp_2019.csv'
PLAYER_PATH = 'https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/roster_data/regular_season/reg_roster_2019.csv'
GAME_PATH = 'https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/games_data/regular_season/reg_games_2019.csv'

In [None]:
# load the data
pbp = pd.read_csv(PBP_PATH, usecols=['play_id', 'game_id', 'game_date',
                                     'posteam', 'defteam', 'play_type',
                                     'complete_pass', 'receiver_player_id',
                                     'receiver_player_name',
                                     'rusher_player_id', 'rusher_player_name'])
player = pd.read_csv(PLAYER_PATH)
games = pd.read_csv(GAME_PATH)

In [None]:
pbp.head()

In [None]:
player.head()

In [None]:
games.head()

Getting weekly stats

In [None]:
weekly_rush_stats = (pbp
                     .query("play_type == 'run'")
                     .groupby(['game_id', 'rusher_player_id', 'rusher_player_name'])
                     .agg(carries = ('play_id', 'count'))
                     .reset_index()
                     .rename(columns={
                         'rusher_player_id': 'player_id',
                         'rusher_player_name': 'player_name'}))

In [None]:
weekly_rush_stats.head()

In [None]:
weekly_rec_stats = (pbp
                    .query("play_type == 'pass'")
                    .groupby(['game_id', 'receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))

In [None]:
weekly_rec_stats.head()

In [None]:
weekly_stats_player = pd.merge(weekly_rec_stats, weekly_rush_stats, how='outer').fillna(0)

In [None]:
weekly_stats_player.head()

Season-long stats by player

In [None]:
season_rush_stats = (pbp
                     .query("play_type == 'run'")
                     .groupby(['rusher_player_id', 'rusher_player_name'])
                     .agg(carries = ('play_id', 'count'))
                     .reset_index()
                     .rename(columns={
                         'rusher_player_id': 'player_id',
                         'rusher_player_name': 'player_name'}))

In [None]:
season_rush_stats.head()

In [None]:
season_rush_stats.query("player_name == 'E.Elliott'")

In [None]:
season_rec_stats = (pbp
                    .query("play_type == 'pass'")
                    .groupby(['receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))

In [None]:
season_rec_stats.query("player_name == 'J.Landry'")

In [None]:
season_stats_player = pd.merge(season_rush_stats, season_rec_stats, how='outer').fillna(0)

In [None]:
season_stats_player.head()

In [None]:
season_stats_player.query("player_name == 'C.McCaffrey'")

In [None]:
season_stats_player.query("player_name == 'E.Elliott'")

In [None]:
season_stats_player.query("player_name == 'J.Landry'")

In [None]:
pbp.head()

In [None]:
pbp_full = pd.read_csv(PBP_PATH)

In [None]:
# Fix mistake in pbp data
#J.Landry did not score a TD in week 1
pbp_full.at[1407,'touchdown'] = 0.0

In [None]:
for col in pbp_full.columns:
    print(col)

In [None]:
pbp_full.query("fumble == 1").query("receiver_player_name == 'S.Barkley'")

In [None]:
pbp_full.iloc[179]

In [None]:
pbp_full.query("touchdown == 1")

In [None]:
season_rush_stats = (pbp_full
                     .query("play_type == 'run' or play_type == 'qb_kneel'")
                     .groupby(['rusher_player_id', 'rusher_player_name'])
                     .agg(carries = ('play_id', 'count'),
                          rush_yards = ('yards_gained', 'sum'),
                          rush_tds = ('touchdown', 'sum'))
                     .reset_index()
                     .rename(columns={
                         'rusher_player_id': 'player_id',
                         'rusher_player_name': 'player_name'}))

In [None]:
season_rush_stats.head()

In [None]:
season_rush_stats.query("player_name == 'S.Barkley'")

In [None]:
season_rec_stats = (pbp_full
                    .query("play_type == 'pass'")
                    .groupby(['receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'),
                         rec_yards = ('yards_gained','sum'),
                         rec_tds = ('touchdown','sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))

In [None]:
season_rec_stats.head()

In [None]:
season_stats = pd.merge(season_rush_stats,season_rec_stats,how='outer').fillna(0)

In [None]:
season_stats.head()

In [None]:
season_stats.query("player_name == 'S.Barkley'")

In [None]:
season_fum_stats = (pbp_full
                    .query("play_type == 'pass'")
                    .groupby(['receiver_player_id', 'receiver_player_name'])
                    .agg(targets = ('play_id', 'count'),
                         catches = ('complete_pass', 'sum'),
                         rec_yards = ('yards_gained','sum'),
                         rec_tds = ('touchdown','sum'))
                    .reset_index()
                    .rename(columns={
                        'receiver_player_id': 'player_id',
                        'receiver_player_name': 'player_name'}))


                         fumbles = ('fumble', 'sum'),
                         fumbles_lost = ('fumble_lost', 'sum'))

In [None]:
ppr = 0.5
td_pt = 6.0
ppc = 0.0
yd_pt = 0.1

In [None]:
season_stats["fantasy_points"] = ppr*season_stats.catches + td_pt*(season_stats.rush_tds + season_stats.rec_tds) + ppc*season_stats.carries + yd_pt*(season_stats.rush_yards + season_stats.rec_yards)

In [None]:
season_stats.sort_values(by=['fantasy_points'],ascending = False).head()