## Passing refactor
Need to add positional passing splits

In [1]:
import logging
import os
import sys

import numpy as np
import pandas as pd

sys.path.insert(0, '..')

from app import db

pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 50)

  pd.set_option('display.max_colwidth', -1)


In [2]:
db_conn = db.get_db_conn()

In [285]:
def _extract(db_conn) -> pd.DataFrame:
    """Getting passing stats, per player per game"""
    logging.info("Extracting passing stats by player by game from play by play...")
    query = """
        SELECT
            year,
            season_type,
            p.game_id AS game_id,
            posteam AS team,
            defteam AS opp,
            week,
            passer_gsis_id AS gsis_id,
            passer_position AS pos,
            passer as player,
            SUM(complete_pass) AS completions,
            SUM(pass_attempt) AS attempts,
            SUM(CASE
                WHEN lateral_rec_yards IS NOT NULL AND sack = 0 THEN yards_gained + lateral_rec_yards
                WHEN sack = 0 THEN yards_gained
                ELSE 0 END
            ) AS yards,
            SUM(CASE WHEN sack = 0 THEN air_yards ELSE 0 END) AS air_yards_intended,
            SUM(CASE WHEN complete_pass = 1 THEN air_yards ELSE 0 END) AS air_yards_completed,
            SUM(pass_touchdown) AS td,
            SUM(interception) as int,
            SUM(sack) as sacks,
            SUM(CASE WHEN sack = 1 THEN yards_gained ELSE 0 END) AS sack_yards,
            SUM(CASE WHEN sack = 1 THEN fumble ELSE 0 END) as fumbles,
            SUM(CASE WHEN sack = 1 THEN fumble_lost ELSE 0 END) AS fumbles_lost,
            SUM(CASE WHEN play_type = 'qb_spike' THEN 1 ELSE 0 END) AS spikes,
            SUM(epa) AS epa,
            SUM(CASE WHEN play_type != 'qb_spike' AND sack = 0 THEN epa ELSE 0 END) AS epa_pass,
            SUM(CASE WHEN interception = 1 THEN epa ELSE 0 END) as epa_int,
            SUM(CASE WHEN sack = 1 THEN epa ELSE 0 END) AS epa_sack,
            SUM(CASE WHEN play_type = 'qb_spike' THEN epa ELSE 0 END) AS epa_spike,
            CASE WHEN
                SUM(cpoe) IS NULL OR SUM(pass_attempt) = 0 THEN 0
                ELSE SUM(cpoe) / SUM(pass_attempt) END
            AS cpoe
        FROM
            play_by_play_enriched AS p
        LEFT JOIN
            (SELECT
                game_id,
                play_id,
                SUM(lateral_rec_yards) as lateral_rec_yards
            FROM
                lateral_receiving_yards
            GROUP BY
                game_id, play_id
            ) as l
        ON
            p.game_id = l.game_id AND p.play_id = l.play_id
        WHERE
            (play_type = 'pass' or play_type = 'qb_spike')
            AND two_point_attempt = 0
        GROUP BY
            year, week, passer_gsis_id, passer_position,
            p.game_id, defteam, posteam, passer, season_type
        ORDER BY
            epa DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of passing stats.")
    return df

df_all = _extract(db_conn)
df_all.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe
0,2020,REG,2020_03_KC_BAL,KC,BAL,3.0,00-0033873,QB,P.Mahomes,31.0,42.0,385.0,346.0,207.0,4.0,0.0,0.0,0.0,0.0,0.0,0,31.377849,31.377849,0.0,0.0,0.0,9.17669
1,2020,REG,2020_06_ATL_MIN,ATL,MIN,6.0,00-0026143,QB,M.Ryan,30.0,42.0,371.0,270.0,200.0,4.0,0.0,2.0,-8.0,0.0,0.0,0,30.441585,33.212461,0.0,-2.770877,0.0,8.848381
2,2020,REG,2020_08_NYJ_KC,KC,NYJ,8.0,00-0033873,QB,P.Mahomes,31.0,42.0,416.0,388.0,218.0,5.0,0.0,0.0,0.0,0.0,0.0,0,27.690373,27.690373,0.0,0.0,0.0,8.220293
3,2020,REG,2020_07_TB_LV,TB,LV,7.0,00-0019596,QB,T.Brady,33.0,45.0,369.0,399.0,235.0,4.0,0.0,0.0,0.0,0.0,0.0,2,26.7001,27.056477,0.0,0.0,-0.356377,13.354659
4,2020,REG,2020_16_TB_DET,TB,DET,16.0,00-0019596,QB,T.Brady,22.0,28.0,348.0,359.0,266.0,4.0,0.0,1.0,-7.0,0.0,0.0,0,26.576751,28.141935,0.0,-1.565183,0.0,20.157525


In [286]:
def _extract_position(db_conn) -> pd.DataFrame:
    """Getting passing stats, per player per game"""
    logging.info("Extracting passing stats by player by game from play by play...")
    query = """
        SELECT
            year,
            season_type,
            p.game_id AS game_id,
            posteam AS team,
            defteam AS opp,
            week,
            passer_gsis_id AS gsis_id,
            passer_position AS pos,
            passer as player,
            
            -- WR stats
            SUM(complete_pass) FILTER (WHERE receiver_position = 'WR') AS completions_wr,
            SUM(pass_attempt) FILTER (WHERE receiver_position = 'WR') AS attempts_wr,
            SUM(CASE
                WHEN lateral_rec_yards IS NOT NULL AND sack = 0 THEN yards_gained + lateral_rec_yards
                WHEN sack = 0 THEN yards_gained
                ELSE 0 END
            ) FILTER (WHERE receiver_position = 'WR') AS yards_wr,
            SUM(CASE WHEN sack = 0 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position = 'WR') AS air_yards_intended_wr,
            SUM(CASE WHEN complete_pass = 1 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position = 'WR') AS air_yards_completed_wr,
            SUM(pass_touchdown) FILTER (WHERE receiver_position = 'WR') AS td_wr,
            SUM(interception) FILTER (WHERE receiver_position = 'WR') as int_wr,
            SUM(epa) FILTER (WHERE receiver_position = 'WR') AS epa_wr,
            SUM(cpoe) FILTER (WHERE receiver_position = 'WR') AS cpoe_wr,
            
            -- TE stats
            SUM(complete_pass) FILTER (WHERE receiver_position = 'TE') AS completions_te,
            SUM(pass_attempt) FILTER (WHERE receiver_position = 'TE') AS attempts_te,
            SUM(CASE
                WHEN lateral_rec_yards IS NOT NULL AND sack = 0 THEN yards_gained + lateral_rec_yards
                WHEN sack = 0 THEN yards_gained
                ELSE 0 END
            ) FILTER (WHERE receiver_position = 'TE') AS yards_te,
            SUM(CASE WHEN sack = 0 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position = 'TE') AS air_yards_intended_te,
            SUM(CASE WHEN complete_pass = 1 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position = 'TE') AS air_yards_completed_te,
            SUM(pass_touchdown) FILTER (WHERE receiver_position = 'TE') AS td_te,
            SUM(interception) FILTER (WHERE receiver_position = 'TE') as int_te,
            SUM(epa) FILTER (WHERE receiver_position = 'TE') AS epa_te,
            SUM(cpoe) FILTER (WHERE receiver_position = 'TE') AS cpoe_te,
            
            -- RB stats
            SUM(complete_pass) FILTER (WHERE receiver_position = 'RB') AS completions_rb,
            SUM(pass_attempt) FILTER (WHERE receiver_position = 'RB') AS attempts_rb,
            SUM(CASE
                WHEN lateral_rec_yards IS NOT NULL AND sack = 0 THEN yards_gained + lateral_rec_yards
                WHEN sack = 0 THEN yards_gained
                ELSE 0 END
            ) FILTER (WHERE receiver_position = 'RB') AS yards_rb,
            SUM(CASE WHEN sack = 0 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position = 'RB') AS air_yards_intended_rb,
            SUM(CASE WHEN complete_pass = 1 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position = 'RB') AS air_yards_completed_rb,
            SUM(pass_touchdown) FILTER (WHERE receiver_position = 'RB') AS td_rb,
            SUM(interception) FILTER (WHERE receiver_position = 'RB') as int_rb,
            SUM(epa) FILTER (WHERE receiver_position = 'RB') AS epa_rb,
            SUM(cpoe) FILTER (WHERE receiver_position = 'RB') AS cpoe_rb,

            -- NULL stats
            SUM(complete_pass) FILTER (WHERE receiver_position IS NULL) AS completions_null,
            SUM(pass_attempt) FILTER (WHERE receiver_position IS NULL) AS attempts_null,
            SUM(CASE
                WHEN lateral_rec_yards IS NOT NULL AND sack = 0 THEN yards_gained + lateral_rec_yards
                WHEN sack = 0 THEN yards_gained
                ELSE 0 END
            ) FILTER (WHERE receiver_position IS NULL) AS yards_null,
            SUM(CASE WHEN sack = 0 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position IS NULL) AS air_yards_intended_null,
            SUM(CASE WHEN complete_pass = 1 THEN air_yards ELSE 0 END)
                FILTER (WHERE receiver_position IS NULL) AS air_yards_completed_null,
            SUM(pass_touchdown) FILTER (WHERE receiver_position IS NULL) AS td_null,
            SUM(interception) FILTER (WHERE receiver_position IS NULL) as int_null,
            SUM(epa) FILTER (WHERE receiver_position IS NULL) AS epa_null,
            SUM(cpoe) FILTER (WHERE receiver_position IS NULL) AS cpoe_null,

            -- Other stats
            SUM(complete_pass) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0)  AS completions_other,
            SUM(pass_attempt) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0)  AS attempts_other,
            SUM(CASE
                WHEN lateral_rec_yards IS NOT NULL AND sack = 0 THEN yards_gained + lateral_rec_yards
                WHEN sack = 0 THEN yards_gained
                ELSE 0 END
            ) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0) 
            AS yards_other,
            SUM(CASE WHEN sack = 0 THEN air_yards ELSE 0 END) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0)  AS air_yards_intended_other,
            SUM(CASE WHEN complete_pass = 1 THEN air_yards ELSE 0 END)
                FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0) AS air_yards_completed_other,
            SUM(pass_touchdown) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0) AS td_other,
            SUM(interception) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0) as int_other,
            SUM(epa) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0) AS epa_other,
            SUM(cpoe) FILTER 
                (WHERE receiver_position NOT IN ('WR', 'TE', 'RB')
                AND receiver_position IS NOT NULL
                AND interception = 0) AS cpoe_other
                        
        FROM
            play_by_play_enriched AS p
        LEFT JOIN
            (SELECT
                game_id,
                play_id,
                SUM(lateral_rec_yards) as lateral_rec_yards
            FROM
                lateral_receiving_yards
            GROUP BY
                game_id, play_id
            ) as l
        ON
            p.game_id = l.game_id AND p.play_id = l.play_id
        WHERE
            play_type = 'pass' AND sack = 0 AND two_point_attempt = 0
        GROUP BY
            year, week, passer_gsis_id, passer_position,
            p.game_id, defteam, posteam, passer, season_type
        ORDER BY yards_wr desc NULLS LAST
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of passing stats.")
    return df

df_position = _extract_position(db_conn)
df_position = df_position.fillna(0)
df_position.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other
0,2020,POST,2020_18_CLE_PIT,PIT,CLE,18.0,00-0022924,QB,B.Roethlisberger,34.0,48.0,405.0,520.0,301.0,3.0,1.0,16.737595,373.759291,7.0,13.0,62.0,53.0,26.0,1.0,2.0,-5.512647,-191.459593,6.0,7.0,34.0,-8.0,-9.0,0.0,1.0,-3.296775,104.927051,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020,REG,2020_03_DAL_SEA,DAL,SEA,3.0,00-0033077,QB,D.Prescott,26.0,37.0,405.0,412.0,271.0,3.0,1.0,23.29843,350.181147,4.0,7.0,48.0,50.0,18.0,0.0,0.0,0.675212,-81.242654,7.0,12.0,24.0,-30.0,-3.0,0.0,0.0,-5.3957,-193.634242,0.0,1.0,0.0,26.0,0.0,0.0,1.0,-2.184421,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020,REG,2020_01_SEA_ATL,ATL,SEA,1.0,00-0026143,QB,M.Ryan,27.0,36.0,401.0,418.0,306.0,2.0,0.0,24.646671,483.945912,3.0,6.0,38.0,63.0,37.0,0.0,0.0,-0.549004,-107.328787,7.0,10.0,11.0,6.0,1.0,0.0,0.0,-6.42063,-133.450878,0.0,2.0,0.0,37.0,0.0,0.0,1.0,-4.744752,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020,REG,2020_12_KC_TB,KC,TB,12.0,00-0033873,QB,P.Mahomes,25.0,33.0,366.0,401.0,253.0,3.0,0.0,25.582753,483.698681,9.0,9.0,84.0,54.0,54.0,0.0,0.0,7.074955,266.43199,3.0,3.0,12.0,-1.0,-1.0,0.0,0.0,-0.277656,59.580034,0.0,4.0,0.0,31.0,0.0,0.0,0.0,-4.414733,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020,REG,2020_02_BUF_MIA,BUF,MIA,2.0,00-0034857,QB,J.Allen,20.0,28.0,358.0,335.0,254.0,3.0,0.0,26.811792,339.594081,2.0,4.0,37.0,33.0,8.0,1.0,0.0,-3.106892,-65.226138,2.0,3.0,20.0,-6.0,-1.0,0.0,0.0,0.752051,-54.724234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [287]:
df_position.sort_values('yards_wr', ascending=False).head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other
0,2020,POST,2020_18_CLE_PIT,PIT,CLE,18.0,00-0022924,QB,B.Roethlisberger,34.0,48.0,405.0,520.0,301.0,3.0,1.0,16.737595,373.759291,7.0,13.0,62.0,53.0,26.0,1.0,2.0,-5.512647,-191.459593,6.0,7.0,34.0,-8.0,-9.0,0.0,1.0,-3.296775,104.927051,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020,REG,2020_03_DAL_SEA,DAL,SEA,3.0,00-0033077,QB,D.Prescott,26.0,37.0,405.0,412.0,271.0,3.0,1.0,23.29843,350.181147,4.0,7.0,48.0,50.0,18.0,0.0,0.0,0.675212,-81.242654,7.0,12.0,24.0,-30.0,-3.0,0.0,0.0,-5.3957,-193.634242,0.0,1.0,0.0,26.0,0.0,0.0,1.0,-2.184421,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020,REG,2020_01_SEA_ATL,ATL,SEA,1.0,00-0026143,QB,M.Ryan,27.0,36.0,401.0,418.0,306.0,2.0,0.0,24.646671,483.945912,3.0,6.0,38.0,63.0,37.0,0.0,0.0,-0.549004,-107.328787,7.0,10.0,11.0,6.0,1.0,0.0,0.0,-6.42063,-133.450878,0.0,2.0,0.0,37.0,0.0,0.0,1.0,-4.744752,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020,REG,2020_12_KC_TB,KC,TB,12.0,00-0033873,QB,P.Mahomes,25.0,33.0,366.0,401.0,253.0,3.0,0.0,25.582753,483.698681,9.0,9.0,84.0,54.0,54.0,0.0,0.0,7.074955,266.43199,3.0,3.0,12.0,-1.0,-1.0,0.0,0.0,-0.277656,59.580034,0.0,4.0,0.0,31.0,0.0,0.0,0.0,-4.414733,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020,REG,2020_02_BUF_MIA,BUF,MIA,2.0,00-0034857,QB,J.Allen,20.0,28.0,358.0,335.0,254.0,3.0,0.0,26.811792,339.594081,2.0,4.0,37.0,33.0,8.0,1.0,0.0,-3.106892,-65.226138,2.0,3.0,20.0,-6.0,-1.0,0.0,0.0,0.752051,-54.724234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [288]:
def _transform(df_all: pd.DataFrame, df_position: pd.DataFrame) -> pd.DataFrame:
    join_cols = ['year', 'season_type', 'game_id', 'team', 'opp', 'week', 'gsis_id', 'pos', 'player']
    df = df_all.merge(df_position, how='left', on=join_cols)
    
    df['target_share_wr'] = df['attempts_wr'] / df['attempts']
    df['target_share_te'] = df['attempts_te'] / df['attempts']
    df['target_share_rb'] = df['attempts_rb'] / df['attempts']
    df['target_share_other'] = df['attempts_other'] / df['attempts']
  
    df['air_yards_intended_share_wr'] = df['air_yards_intended_wr'] / df['air_yards_intended']
    df['air_yards_intended_share_te'] = df['air_yards_intended_te'] / df['air_yards_intended']
    df['air_yards_intended_share_rb'] = df['air_yards_intended_rb'] / df['air_yards_intended']

    df['air_yards_completed_share_wr'] = df['air_yards_completed_wr'] / df['air_yards_completed']
    df['air_yards_completed_share_te'] = df['air_yards_completed_te'] / df['air_yards_completed']
    df['air_yards_completed_share_rb'] = df['air_yards_completed_rb'] / df['air_yards_completed']  
    
    df = df.replace([np.inf, -np.inf], np.nan)
    return df.fillna(0)    


In [289]:
df = _transform(df_all, df_position)
df.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other,target_share_wr,target_share_te,target_share_rb,target_share_other,air_yards_intended_share_wr,air_yards_intended_share_te,air_yards_intended_share_rb,air_yards_completed_share_wr,air_yards_completed_share_te,air_yards_completed_share_rb
0,2020,REG,2020_03_KC_BAL,KC,BAL,3.0,00-0033873,QB,P.Mahomes,31.0,42.0,385.0,346.0,207.0,4.0,0.0,0.0,0.0,0.0,0.0,0,31.377849,31.377849,0.0,0.0,0.0,9.17669,16.0,21.0,220.0,288.0,169.0,2.0,0.0,18.443355,393.802585,6.0,8.0,87.0,52.0,38.0,0.0,0.0,5.017431,9.524,8.0,11.0,76.0,1.0,-1.0,1.0,0.0,7.776404,-49.39521,0.0,1.0,0.0,4.0,0.0,0.0,0.0,-0.764009,0.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,0.904668,31.489611,0.5,0.190476,0.261905,0.02381,0.83237,0.150289,0.00289,0.816425,0.183575,-0.004831
1,2020,REG,2020_06_ATL_MIN,ATL,MIN,6.0,00-0026143,QB,M.Ryan,30.0,42.0,371.0,270.0,200.0,4.0,0.0,2.0,-8.0,0.0,0.0,0,30.441585,33.212461,0.0,-2.770877,0.0,8.848381,19.0,25.0,268.0,224.0,170.0,3.0,0.0,23.358781,224.087441,4.0,4.0,57.0,21.0,21.0,1.0,0.0,8.707816,108.141237,6.0,7.0,43.0,5.0,6.0,0.0,0.0,3.862365,19.846362,0.0,3.0,0.0,17.0,0.0,0.0,0.0,-2.419047,0.0,1.0,1.0,3.0,3.0,3.0,0.0,0.0,-0.297454,19.556969,0.595238,0.095238,0.166667,0.02381,0.82963,0.077778,0.018519,0.85,0.105,0.03
2,2020,REG,2020_08_NYJ_KC,KC,NYJ,8.0,00-0033873,QB,P.Mahomes,31.0,42.0,416.0,388.0,218.0,5.0,0.0,0.0,0.0,0.0,0.0,0,27.690373,27.690373,0.0,0.0,0.0,8.220293,14.0,20.0,249.0,294.0,171.0,4.0,0.0,17.918665,220.967728,10.0,15.0,119.0,105.0,58.0,1.0,0.0,6.302816,8.920118,7.0,7.0,48.0,-11.0,-11.0,0.0,0.0,3.468892,115.36448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47619,0.357143,0.166667,0.0,0.757732,0.270619,-0.028351,0.784404,0.266055,-0.050459
3,2020,REG,2020_07_TB_LV,TB,LV,7.0,00-0019596,QB,T.Brady,33.0,45.0,369.0,399.0,235.0,4.0,0.0,0.0,0.0,0.0,0.0,2,26.7001,27.056477,0.0,0.0,-0.356377,13.354659,19.0,22.0,238.0,235.0,160.0,3.0,0.0,21.308635,473.062178,7.0,10.0,82.0,123.0,68.0,1.0,0.0,5.583212,138.986549,7.0,9.0,49.0,14.0,7.0,0.0,0.0,1.71599,-11.089057,0.0,2.0,0.0,27.0,0.0,0.0,0.0,-1.55136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.488889,0.222222,0.2,0.0,0.588972,0.308271,0.035088,0.680851,0.289362,0.029787
4,2020,REG,2020_16_TB_DET,TB,DET,16.0,00-0019596,QB,T.Brady,22.0,28.0,348.0,359.0,266.0,4.0,0.0,1.0,-7.0,0.0,0.0,0,26.576751,28.141935,0.0,-1.565183,0.0,20.157525,14.0,18.0,229.0,286.0,193.0,3.0,0.0,19.641855,334.387913,4.0,4.0,80.0,66.0,66.0,1.0,0.0,7.51591,152.923632,4.0,4.0,39.0,7.0,7.0,0.0,0.0,2.378777,77.09915,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.394607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.642857,0.142857,0.142857,0.0,0.796657,0.183844,0.019499,0.725564,0.24812,0.026316


In [290]:
df.to_csv('passing.csv', index=False)

In [283]:
df.describe()

Unnamed: 0,year,week,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other,target_share_wr,target_share_te,target_share_rb,target_share_other,air_yards_intended_share_wr,air_yards_intended_share_te,air_yards_intended_share_rb,air_yards_completed_share_wr,air_yards_completed_share_te,air_yards_completed_share_rb,epa_share_wr,epa_share_te,epa_share_rb,epa_share_other
count,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0,675.0
mean,2020.0,9.714074,18.244444,29.758519,202.69037,221.361481,110.457778,1.352593,0.613333,1.751111,-11.656296,0.232593,0.13037,0.127407,1.792937,4.979363,-2.794407,-3.18576,-0.000665,1.149579,10.500741,16.084444,132.662222,169.008889,86.657778,0.832593,0.419259,4.627433,29.90424,3.798519,5.634074,40.746667,41.637037,23.100741,0.355556,0.128889,1.153798,2.442212,3.761481,4.924444,27.896296,3.511111,0.291852,0.143704,0.051852,-0.02451,-5.346975,0.0,0.973333,0.0,6.379259,0.0,0.0,0.008889,-0.778835,0.0,0.183704,0.259259,1.385185,0.798519,0.407407,0.020741,0.0,0.034634,-0.439745,0.51756,0.176637,0.165883,0.027971,0.712982,0.18134,0.033869,0.712748,0.188563,0.031387,0.531418,-0.104998,0.494463,0.012361
std,0.0,5.222329,10.026276,15.361791,114.713475,128.292945,70.563665,1.230234,0.813535,1.603125,11.689622,0.48796,0.378439,0.405897,9.872812,9.236225,3.968766,3.320148,0.106784,16.507891,6.477163,9.249604,84.186974,104.745813,60.359285,0.960714,0.644941,7.223073,182.00769,2.980799,4.099399,35.683235,36.279947,22.932657,0.595067,0.364986,3.747743,105.692466,2.963326,3.623028,26.383464,14.076343,10.046295,0.383372,0.228481,2.899394,93.719564,0.0,1.156532,0.0,12.730772,0.0,0.0,0.093931,1.079356,0.0,0.489081,0.611014,5.038406,3.978167,2.6678,0.15267,0.0,0.582843,21.890824,0.20687,0.13585,0.151091,0.139479,0.261664,0.16355,0.198453,0.377559,0.272186,0.435382,3.848059,5.513361,5.845697,0.563176
min,2020.0,1.0,0.0,1.0,0.0,-8.0,-11.0,0.0,0.0,0.0,-62.0,0.0,0.0,0.0,-27.435011,-26.100595,-21.33836,-19.7016,-1.189761,-74.807233,0.0,0.0,-3.0,-7.0,-7.0,0.0,0.0,-19.542518,-641.543058,0.0,0.0,-1.0,-5.0,-7.0,0.0,0.0,-11.741766,-361.309057,0.0,0.0,-9.0,-35.0,-34.0,0.0,0.0,-13.433739,-658.381599,0.0,0.0,0.0,-13.0,0.0,0.0,0.0,-7.8448,0.0,0.0,0.0,-10.0,-10.0,-10.0,0.0,0.0,-3.321605,-147.049183,0.0,0.0,0.0,0.0,0.0,-0.185185,-1.0,-3.0,-4.0,-2.333333,-53.948221,-113.152785,-16.597941,-10.772376
25%,2020.0,5.0,12.0,22.0,122.0,139.0,60.5,0.0,0.0,0.0,-18.0,0.0,0.0,0.0,-3.61677,-0.889879,-4.727705,-4.784709,0.0,-4.709676,6.0,10.0,70.0,97.0,37.5,0.0,0.0,0.0,-74.17866,1.0,2.0,9.0,11.0,3.0,0.0,0.0,-0.565807,-59.249145,1.0,2.0,8.0,-4.0,-5.0,0.0,0.0,-1.313497,-50.091332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.24788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.440927,0.090909,0.088235,0.0,0.638727,0.061818,-0.015152,0.613023,0.030032,-0.041484,0.122935,0.0,-0.066819,0.0
50%,2020.0,10.0,20.0,33.0,220.0,234.0,115.0,1.0,0.0,1.0,-9.0,0.0,0.0,0.0,1.204069,3.704098,0.0,-2.449833,0.0,0.965504,11.0,17.0,136.0,173.0,85.0,1.0,0.0,3.453016,10.15498,4.0,6.0,36.0,37.0,18.0,0.0,0.0,0.617972,0.0,3.0,5.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.407604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.530612,0.173913,0.150943,0.0,0.767635,0.163934,0.0,0.79646,0.166667,0.0,0.664844,0.160368,0.041242,0.0
75%,2020.0,14.0,25.0,41.0,284.5,313.0,157.5,2.0,1.0,3.0,0.0,0.0,0.0,0.0,8.278782,11.444477,0.0,0.0,0.0,7.008256,15.0,22.0,192.0,235.5,128.0,1.0,1.0,9.713733,143.025792,6.0,9.0,62.0,63.0,36.0,1.0,0.0,3.201197,57.751465,5.0,7.0,40.0,9.0,3.0,0.0,0.0,1.269197,46.295434,0.0,2.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.622222,0.243844,0.203736,0.0,0.867847,0.2589,0.03837,0.917854,0.286239,0.03306,1.0,0.491157,0.336831,0.0
max,2020.0,20.0,47.0,68.0,502.0,571.0,344.0,5.0,4.0,8.0,0.0,3.0,3.0,4.0,31.377849,33.212461,0.0,2.109911,0.736607,74.084499,34.0,48.0,405.0,520.0,306.0,4.0,3.0,26.811792,651.333398,14.0,26.0,200.0,229.0,132.0,3.0,3.0,15.426384,433.549133,17.0,20.0,146.0,132.0,71.0,2.0,2.0,10.539444,345.35982,0.0,5.0,0.0,98.0,0.0,0.0,1.0,0.330168,0.0,4.0,4.0,44.0,47.0,22.0,2.0,0.0,4.014011,69.974941,1.0,1.0,1.0,1.0,1.5,1.0,3.0,3.333333,1.2,8.0,50.852313,11.000177,129.523539,5.783382


In [284]:
import numpy as np

df[df['air_yards_intended_share_wr'] == -np.inf]

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other,target_share_wr,target_share_te,target_share_rb,target_share_other,air_yards_intended_share_wr,air_yards_intended_share_te,air_yards_intended_share_rb,air_yards_completed_share_wr,air_yards_completed_share_te,air_yards_completed_share_rb,epa_share_wr,epa_share_te,epa_share_rb,epa_share_other


In [265]:

df = df.replace([np.inf, -np.inf], np.nan)

In [266]:
df[df['air_yards_intended_share_wr'] == -np.inf]

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other,target_share_wr,target_share_te,target_share_rb,target_share_other,air_yards_intended_share_wr,air_yards_intended_share_te,air_yards_intended_share_rb,air_yards_completed_share_wr,air_yards_completed_share_te,air_yards_completed_share_rb,epa_share_wr,epa_share_te,epa_share_rb,epa_share_other


## passing by player by year

In [267]:
df.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other,target_share_wr,target_share_te,target_share_rb,target_share_other,air_yards_intended_share_wr,air_yards_intended_share_te,air_yards_intended_share_rb,air_yards_completed_share_wr,air_yards_completed_share_te,air_yards_completed_share_rb,epa_share_wr,epa_share_te,epa_share_rb,epa_share_other
0,2020,REG,2020_03_KC_BAL,KC,BAL,3.0,00-0033873,QB,P.Mahomes,31.0,42.0,385.0,346.0,207.0,4.0,0.0,0.0,0.0,0.0,0.0,0,31.377849,31.377849,0.0,0.0,0.0,9.17669,16.0,21.0,220.0,288.0,169.0,2.0,0.0,18.443355,393.802585,6.0,8.0,87.0,52.0,38.0,0.0,0.0,5.017431,9.524,8.0,11.0,76.0,1.0,-1.0,1.0,0.0,7.776404,-49.39521,0.0,1.0,0.0,4.0,0.0,0.0,0.0,-0.764009,0.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,0.904668,31.489611,0.5,0.190476,0.261905,0.02381,0.83237,0.150289,0.00289,0.816425,0.183575,-0.004831,0.587783,0.159904,0.247831,0.028831
1,2020,REG,2020_06_ATL_MIN,ATL,MIN,6.0,00-0026143,QB,M.Ryan,30.0,42.0,371.0,270.0,200.0,4.0,0.0,2.0,-8.0,0.0,0.0,0,30.441585,33.212461,0.0,-2.770877,0.0,8.848381,19.0,25.0,268.0,224.0,170.0,3.0,0.0,23.358781,224.087441,4.0,4.0,57.0,21.0,21.0,1.0,0.0,8.707816,108.141237,6.0,7.0,43.0,5.0,6.0,0.0,0.0,3.862365,19.846362,0.0,3.0,0.0,17.0,0.0,0.0,0.0,-2.419047,0.0,1.0,1.0,3.0,3.0,3.0,0.0,0.0,-0.297454,19.556969,0.595238,0.095238,0.166667,0.02381,0.82963,0.077778,0.018519,0.85,0.105,0.03,0.703314,0.262185,0.116293,-0.008956
2,2020,REG,2020_08_NYJ_KC,KC,NYJ,8.0,00-0033873,QB,P.Mahomes,31.0,42.0,416.0,388.0,218.0,5.0,0.0,0.0,0.0,0.0,0.0,0,27.690373,27.690373,0.0,0.0,0.0,8.220293,14.0,20.0,249.0,294.0,171.0,4.0,0.0,17.918665,220.967728,10.0,15.0,119.0,105.0,58.0,1.0,0.0,6.302816,8.920118,7.0,7.0,48.0,-11.0,-11.0,0.0,0.0,3.468892,115.36448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47619,0.357143,0.166667,0.0,0.757732,0.270619,-0.028351,0.784404,0.266055,-0.050459,0.647108,0.227618,0.125274,0.0
3,2020,REG,2020_07_TB_LV,TB,LV,7.0,00-0019596,QB,T.Brady,33.0,45.0,369.0,399.0,235.0,4.0,0.0,0.0,0.0,0.0,0.0,2,26.7001,27.056477,0.0,0.0,-0.356377,13.354659,19.0,22.0,238.0,235.0,160.0,3.0,0.0,21.308635,473.062178,7.0,10.0,82.0,123.0,68.0,1.0,0.0,5.583212,138.986549,7.0,9.0,49.0,14.0,7.0,0.0,0.0,1.71599,-11.089057,0.0,2.0,0.0,27.0,0.0,0.0,0.0,-1.55136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.488889,0.222222,0.2,0.0,0.588972,0.308271,0.035088,0.680851,0.289362,0.029787,0.787561,0.206354,0.063423,0.0
4,2020,REG,2020_16_TB_DET,TB,DET,16.0,00-0019596,QB,T.Brady,22.0,28.0,348.0,359.0,266.0,4.0,0.0,1.0,-7.0,0.0,0.0,0,26.576751,28.141935,0.0,-1.565183,0.0,20.157525,14.0,18.0,229.0,286.0,193.0,3.0,0.0,19.641855,334.387913,4.0,4.0,80.0,66.0,66.0,1.0,0.0,7.51591,152.923632,4.0,4.0,39.0,7.0,7.0,0.0,0.0,2.378777,77.09915,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.394607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.642857,0.142857,0.142857,0.0,0.796657,0.183844,0.019499,0.725564,0.24812,0.026316,0.697957,0.267072,0.084528,0.0


In [268]:
df2 = df[[col for col in df.columns if "_share" not in col]]
df2.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other
0,2020,REG,2020_03_KC_BAL,KC,BAL,3.0,00-0033873,QB,P.Mahomes,31.0,42.0,385.0,346.0,207.0,4.0,0.0,0.0,0.0,0.0,0.0,0,31.377849,31.377849,0.0,0.0,0.0,9.17669,16.0,21.0,220.0,288.0,169.0,2.0,0.0,18.443355,393.802585,6.0,8.0,87.0,52.0,38.0,0.0,0.0,5.017431,9.524,8.0,11.0,76.0,1.0,-1.0,1.0,0.0,7.776404,-49.39521,0.0,1.0,0.0,4.0,0.0,0.0,0.0,-0.764009,0.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,0.904668,31.489611
1,2020,REG,2020_06_ATL_MIN,ATL,MIN,6.0,00-0026143,QB,M.Ryan,30.0,42.0,371.0,270.0,200.0,4.0,0.0,2.0,-8.0,0.0,0.0,0,30.441585,33.212461,0.0,-2.770877,0.0,8.848381,19.0,25.0,268.0,224.0,170.0,3.0,0.0,23.358781,224.087441,4.0,4.0,57.0,21.0,21.0,1.0,0.0,8.707816,108.141237,6.0,7.0,43.0,5.0,6.0,0.0,0.0,3.862365,19.846362,0.0,3.0,0.0,17.0,0.0,0.0,0.0,-2.419047,0.0,1.0,1.0,3.0,3.0,3.0,0.0,0.0,-0.297454,19.556969
2,2020,REG,2020_08_NYJ_KC,KC,NYJ,8.0,00-0033873,QB,P.Mahomes,31.0,42.0,416.0,388.0,218.0,5.0,0.0,0.0,0.0,0.0,0.0,0,27.690373,27.690373,0.0,0.0,0.0,8.220293,14.0,20.0,249.0,294.0,171.0,4.0,0.0,17.918665,220.967728,10.0,15.0,119.0,105.0,58.0,1.0,0.0,6.302816,8.920118,7.0,7.0,48.0,-11.0,-11.0,0.0,0.0,3.468892,115.36448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020,REG,2020_07_TB_LV,TB,LV,7.0,00-0019596,QB,T.Brady,33.0,45.0,369.0,399.0,235.0,4.0,0.0,0.0,0.0,0.0,0.0,2,26.7001,27.056477,0.0,0.0,-0.356377,13.354659,19.0,22.0,238.0,235.0,160.0,3.0,0.0,21.308635,473.062178,7.0,10.0,82.0,123.0,68.0,1.0,0.0,5.583212,138.986549,7.0,9.0,49.0,14.0,7.0,0.0,0.0,1.71599,-11.089057,0.0,2.0,0.0,27.0,0.0,0.0,0.0,-1.55136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020,REG,2020_16_TB_DET,TB,DET,16.0,00-0019596,QB,T.Brady,22.0,28.0,348.0,359.0,266.0,4.0,0.0,1.0,-7.0,0.0,0.0,0,26.576751,28.141935,0.0,-1.565183,0.0,20.157525,14.0,18.0,229.0,286.0,193.0,3.0,0.0,19.641855,334.387913,4.0,4.0,80.0,66.0,66.0,1.0,0.0,7.51591,152.923632,4.0,4.0,39.0,7.0,7.0,0.0,0.0,2.378777,77.09915,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.394607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [269]:
df_year = df2.groupby(['year', 'season_type', 'team', 'gsis_id', 'pos', 'player'], as_index=False).sum()
df_year = df_year.drop('week', axis=1)
df_year.sort_values('yards', ascending=False).head()

Unnamed: 0,year,season_type,team,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other
69,2020,REG,HOU,00-0033537,QB,D.Watson,382.0,593.0,4823.0,4837.0,2847.0,33.0,7.0,49.0,-293.0,4.0,2.0,2,117.257648,195.153615,-29.852111,-78.482956,0.586989,90.099783,238.0,333.0,3313.0,3672.0,2270.0,22.0,7.0,160.732575,2469.900718,75.0,100.0,913.0,753.0,480.0,7.0,0.0,48.042234,839.112136,69.0,90.0,597.0,247.0,97.0,4.0,0.0,4.802283,92.341116,0.0,19.0,0.0,165.0,0.0,0.0,0.0,-18.423477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
80,2020,REG,KC,00-0033873,QB,P.Mahomes,390.0,610.0,4740.0,4910.0,2499.0,38.0,6.0,22.0,-147.0,3.0,2.0,1,164.783255,204.597346,-34.18726,-39.623116,-0.190975,45.265022,206.0,308.0,2714.0,3432.0,1697.0,24.0,2.0,145.522942,1383.308682,116.0,160.0,1509.0,1328.0,856.0,11.0,3.0,68.367863,1425.293311,67.0,101.0,515.0,6.0,-55.0,2.0,1.0,2.08525,-1236.787689,0.0,17.0,0.0,143.0,0.0,0.0,0.0,-12.283379,0.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,0.904668,31.489611
124,2020,REG,TB,00-0019596,QB,T.Brady,401.0,631.0,4633.0,5529.0,2823.0,40.0,12.0,21.0,-143.0,1.0,0.0,4,121.549613,154.740568,-60.175792,-32.47257,-0.718385,61.998554,232.0,336.0,3038.0,3901.0,2083.0,28.0,8.0,145.381619,2650.603761,86.0,137.0,1067.0,1462.0,726.0,10.0,3.0,47.165101,345.598212,83.0,117.0,528.0,84.0,14.0,2.0,1.0,-24.296326,-822.398666,0.0,16.0,0.0,82.0,0.0,0.0,0.0,-13.509826,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22,2020,REG,ATL,00-0026143,QB,M.Ryan,407.0,667.0,4581.0,5342.0,2939.0,26.0,11.0,41.0,-257.0,4.0,2.0,1,58.597917,127.208177,-39.656559,-68.835721,0.225461,17.236272,264.0,394.0,3432.0,4410.0,2511.0,20.0,7.0,146.181033,2025.029935,65.0,103.0,652.0,626.0,341.0,6.0,2.0,16.204552,-817.811656,67.0,91.0,438.0,90.0,72.0,0.0,0.0,-7.737334,-383.512294,0.0,22.0,0.0,188.0,0.0,0.0,2.0,-24.886623,0.0,11.0,15.0,59.0,28.0,15.0,0.0,0.0,-2.553451,-84.425908
32,2020,REG,BUF,00-0034857,QB,J.Allen,396.0,598.0,4544.0,4857.0,2673.0,37.0,10.0,26.0,-159.0,5.0,2.0,0,159.575826,202.767085,-41.737546,-43.191259,0.0,93.124603,302.0,412.0,3719.0,4093.0,2436.0,26.0,8.0,205.761436,3788.594489,41.0,64.0,439.0,462.0,207.0,9.0,2.0,9.529699,-146.213073,53.0,72.0,386.0,143.0,30.0,2.0,0.0,2.529237,-29.39257,0.0,24.0,0.0,159.0,0.0,0.0,0.0,-15.053287,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [275]:
def _transform(df: pd.DataFrame) -> pd.DataFrame:

    df['target_share_wr'] = df['attempts_wr'] / df['attempts']
    df['target_share_te'] = df['attempts_te'] / df['attempts']
    df['target_share_rb'] = df['attempts_rb'] / df['attempts']
    df['target_share_other'] = df['attempts_other'] / df['attempts']
    
    df['air_yards_intended_share_wr'] = df['air_yards_intended_wr'] / df['air_yards_intended']
    df['air_yards_intended_share_te'] = df['air_yards_intended_te'] / df['air_yards_intended']
    df['air_yards_intended_share_rb'] = df['air_yards_intended_rb'] / df['air_yards_intended']

    df['air_yards_completed_share_wr'] = df['air_yards_completed_wr'] / df['air_yards_completed']
    df['air_yards_completed_share_te'] = df['air_yards_completed_te'] / df['air_yards_completed']
    df['air_yards_completed_share_rb'] = df['air_yards_completed_rb'] / df['air_yards_completed']  
    
    df = df.replace([np.inf, -np.inf], np.nan)
    return df.fillna(0) 

In [276]:
df_year = _transform(df_year)
df_year.sort_values('yards', ascending=False).head()

Unnamed: 0,year,season_type,team,gsis_id,pos,player,completions,attempts,yards,air_yards_intended,air_yards_completed,td,int,sacks,sack_yards,fumbles,fumbles_lost,spikes,epa,epa_pass,epa_int,epa_sack,epa_spike,cpoe,completions_wr,attempts_wr,yards_wr,air_yards_intended_wr,air_yards_completed_wr,td_wr,int_wr,epa_wr,cpoe_wr,completions_te,attempts_te,yards_te,air_yards_intended_te,air_yards_completed_te,td_te,int_te,epa_te,cpoe_te,completions_rb,attempts_rb,yards_rb,air_yards_intended_rb,air_yards_completed_rb,td_rb,int_rb,epa_rb,cpoe_rb,completions_null,attempts_null,yards_null,air_yards_intended_null,air_yards_completed_null,td_null,int_null,epa_null,cpoe_null,completions_other,attempts_other,yards_other,air_yards_intended_other,air_yards_completed_other,td_other,int_other,epa_other,cpoe_other,target_share_wr,target_share_te,target_share_rb,target_share_other,air_yards_intended_share_wr,air_yards_intended_share_te,air_yards_intended_share_rb,air_yards_completed_share_wr,air_yards_completed_share_te,air_yards_completed_share_rb,epa_share_wr,epa_share_te,epa_share_rb
69,2020,REG,HOU,00-0033537,QB,D.Watson,382.0,593.0,4823.0,4837.0,2847.0,33.0,7.0,49.0,-293.0,4.0,2.0,2,117.257648,195.153615,-29.852111,-78.482956,0.586989,90.099783,238.0,333.0,3313.0,3672.0,2270.0,22.0,7.0,160.732575,2469.900718,75.0,100.0,913.0,753.0,480.0,7.0,0.0,48.042234,839.112136,69.0,90.0,597.0,247.0,97.0,4.0,0.0,4.802283,92.341116,0.0,19.0,0.0,165.0,0.0,0.0,0.0,-18.423477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.561551,0.168634,0.151771,0.0,0.759148,0.155675,0.051065,0.797331,0.168599,0.034071,0.823621,0.246176,0.024608
80,2020,REG,KC,00-0033873,QB,P.Mahomes,390.0,610.0,4740.0,4910.0,2499.0,38.0,6.0,22.0,-147.0,3.0,2.0,1,164.783255,204.597346,-34.18726,-39.623116,-0.190975,45.265022,206.0,308.0,2714.0,3432.0,1697.0,24.0,2.0,145.522942,1383.308682,116.0,160.0,1509.0,1328.0,856.0,11.0,3.0,68.367863,1425.293311,67.0,101.0,515.0,6.0,-55.0,2.0,1.0,2.08525,-1236.787689,0.0,17.0,0.0,143.0,0.0,0.0,0.0,-12.283379,0.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,0.904668,31.489611,0.504918,0.262295,0.165574,0.001639,0.698982,0.270468,0.001222,0.679072,0.342537,-0.022009,0.711265,0.334158,0.010192
124,2020,REG,TB,00-0019596,QB,T.Brady,401.0,631.0,4633.0,5529.0,2823.0,40.0,12.0,21.0,-143.0,1.0,0.0,4,121.549613,154.740568,-60.175792,-32.47257,-0.718385,61.998554,232.0,336.0,3038.0,3901.0,2083.0,28.0,8.0,145.381619,2650.603761,86.0,137.0,1067.0,1462.0,726.0,10.0,3.0,47.165101,345.598212,83.0,117.0,528.0,84.0,14.0,2.0,1.0,-24.296326,-822.398666,0.0,16.0,0.0,82.0,0.0,0.0,0.0,-13.509826,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.532488,0.217116,0.18542,0.0,0.705553,0.264424,0.015193,0.737868,0.257173,0.004959,0.939518,0.304801,-0.157013
22,2020,REG,ATL,00-0026143,QB,M.Ryan,407.0,667.0,4581.0,5342.0,2939.0,26.0,11.0,41.0,-257.0,4.0,2.0,1,58.597917,127.208177,-39.656559,-68.835721,0.225461,17.236272,264.0,394.0,3432.0,4410.0,2511.0,20.0,7.0,146.181033,2025.029935,65.0,103.0,652.0,626.0,341.0,6.0,2.0,16.204552,-817.811656,67.0,91.0,438.0,90.0,72.0,0.0,0.0,-7.737334,-383.512294,0.0,22.0,0.0,188.0,0.0,0.0,2.0,-24.886623,0.0,11.0,15.0,59.0,28.0,15.0,0.0,0.0,-2.553451,-84.425908,0.590705,0.154423,0.136432,0.022489,0.825534,0.117185,0.016848,0.854372,0.116026,0.024498,1.149148,0.127386,-0.060824
32,2020,REG,BUF,00-0034857,QB,J.Allen,396.0,598.0,4544.0,4857.0,2673.0,37.0,10.0,26.0,-159.0,5.0,2.0,0,159.575826,202.767085,-41.737546,-43.191259,0.0,93.124603,302.0,412.0,3719.0,4093.0,2436.0,26.0,8.0,205.761436,3788.594489,41.0,64.0,439.0,462.0,207.0,9.0,2.0,9.529699,-146.213073,53.0,72.0,386.0,143.0,30.0,2.0,0.0,2.529237,-29.39257,0.0,24.0,0.0,159.0,0.0,0.0,0.0,-15.053287,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.688963,0.107023,0.120401,0.0,0.842701,0.09512,0.029442,0.911336,0.077441,0.011223,1.014767,0.046998,0.012474


## Debugging EPA

In [272]:
df[['week', 'player'] + [col for col in df.columns if 'epa' in col]].head(20)

Unnamed: 0,week,player,epa,epa_pass,epa_int,epa_sack,epa_spike,epa_wr,epa_te,epa_rb,epa_null,epa_other,epa_share_wr,epa_share_te,epa_share_rb,epa_share_other
0,3.0,P.Mahomes,31.377849,31.377849,0.0,0.0,0.0,18.443355,5.017431,7.776404,-0.764009,0.904668,0.587783,0.159904,0.247831,0.028831
1,6.0,M.Ryan,30.441585,33.212461,0.0,-2.770877,0.0,23.358781,8.707816,3.862365,-2.419047,-0.297454,0.703314,0.262185,0.116293,-0.008956
2,8.0,P.Mahomes,27.690373,27.690373,0.0,0.0,0.0,17.918665,6.302816,3.468892,0.0,0.0,0.647108,0.227618,0.125274,0.0
3,7.0,T.Brady,26.7001,27.056477,0.0,0.0,-0.356377,21.308635,5.583212,1.71599,-1.55136,0.0,0.787561,0.206354,0.063423,0.0
4,16.0,T.Brady,26.576751,28.141935,0.0,-1.565183,0.0,19.641855,7.51591,2.378777,-1.394607,0.0,0.697957,0.267072,0.084528,0.0
5,4.0,A.Rodgers,25.859497,27.70152,0.0,-1.842023,0.0,6.952259,12.232603,8.516657,0.0,0.0,0.25097,0.441586,0.307444,0.0
6,15.0,J.Allen,25.663771,27.42544,0.0,-1.761669,0.0,25.398701,2.952196,0.107462,-1.032919,0.0,0.9261,0.107644,0.003918,0.0
7,3.0,N.Mullens,24.949303,27.881458,0.0,-2.932155,0.0,12.392187,5.143623,10.539444,-0.558483,0.364687,0.44446,0.184482,0.378009,0.01308
8,13.0,B.Mayfield,24.863516,24.863516,0.0,0.0,0.0,19.174945,3.675509,3.698866,-2.930725,1.244922,0.771208,0.147827,0.148767,0.05007
9,15.0,R.Tannehill,23.807681,25.217501,0.0,-1.40982,0.0,11.688617,8.90174,4.862541,-0.235396,0.0,0.463512,0.352998,0.192824,0.0


In [273]:
25.839068 + 1.153104 + -0.729256 + -2.081523

24.181393

In [274]:
df[['week', 'player'] + [col for col in df.columns if 'target' in col]].head(20)

Unnamed: 0,week,player,target_share_wr,target_share_te,target_share_rb,target_share_other
0,3.0,P.Mahomes,0.5,0.190476,0.261905,0.02381
1,6.0,M.Ryan,0.595238,0.095238,0.166667,0.02381
2,8.0,P.Mahomes,0.47619,0.357143,0.166667,0.0
3,7.0,T.Brady,0.488889,0.222222,0.2,0.0
4,16.0,T.Brady,0.642857,0.142857,0.142857,0.0
5,4.0,A.Rodgers,0.352941,0.176471,0.441176,0.0
6,15.0,J.Allen,0.731707,0.121951,0.073171,0.0
7,3.0,N.Mullens,0.421053,0.263158,0.210526,0.026316
8,13.0,B.Mayfield,0.666667,0.090909,0.151515,0.030303
9,15.0,R.Tannehill,0.464286,0.285714,0.178571,0.0
