In [37]:
import logging
import os
import sys

import numpy as np
import pandas as pd

sys.path.insert(0, '..')

from app import db

pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 50)

  pd.set_option('display.max_colwidth', -1)


In [121]:
db_conn = db.get_db_conn()

In [54]:
def _extract_designed(db_conn) -> pd.DataFrame:
    """Getting designed rushing stats, per player per game, from the play by play."""
    logging.info("Extracting designed rushing stats from play by play...")
    query = """
        SELECT
            year,
            season_type,
            game_id,
            defteam AS opp,
            posteam AS team,
            week,
            rusher_gsis_id AS gsis_id,
            rusher_position AS pos,
            rusher,
            'designed' AS rush_type,
            SUM(rush) AS attempts,
            SUM(yards_gained) AS yards,
            SUM(rush_touchdown) AS td,
            SUM(fumble) AS fumbles,
            SUM(fumble_lost) AS fumbles_lost,
            SUM(fumble_out_of_bounds) AS fumbles_out_of_bounds,
            SUM(epa) AS epa
        FROM
            play_by_play_enriched
        WHERE
            play_type = 'run'
            AND two_point_attempt = 0
            AND rusher is not null
        GROUP BY
            year, season_type, game_id, rusher_gsis_id, rusher_position,
            week, posteam, defteam, rusher_id, rusher
        ORDER BY
            yards DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of designed rushing stats.")
    return df


def _extract_scrambles(db_conn) -> pd.DataFrame:
    """Getting scramble runs from the play by play.

    Scrambles 'play_type' of 'run', but the `rusher` value is null and the
    `passer` value is the scrambler.
    """
    logging.info("Extracting scramble rushing stats from play by play...")
    query = """
        SELECT
            year,
            season_type,
            game_id,
            defteam AS opp,
            posteam AS team,
            week,
            passer_gsis_id AS gsis_id,
            passer_position as pos,
            passer AS rusher,
            'scramble' AS rush_type,
            SUM(pass) AS attempts,
            SUM(yards_gained) AS yards,
            SUM(rush_touchdown) AS td,
            SUM(fumble) AS fumbles,
            SUM(fumble_lost) AS fumbles_lost,
            SUM(fumble_out_of_bounds) AS fumbles_out_of_bounds,
            SUM(epa) AS epa
        FROM
            play_by_play_enriched
        WHERE
            play_type = 'run'
            AND two_point_attempt = 0
            AND passer is not null
        GROUP BY
            year, season_type, game_id, week,
            posteam, defteam, passer_position, passer_gsis_id, passer
        ORDER BY
            yards DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of scramble rushing stats.")
    return df


def _extract_qb_kneels(db_conn) -> pd.DataFrame:
    """Getting qb kneels from the play by play since they are included in rushing stats."""
    logging.info("Extracting qb kneel stats from play by play...")
    query = """
        SELECT
            year,
            season_type,
            game_id,
            defteam AS opp,
            posteam AS team,
            week,
            rusher_position as pos,
            rusher_gsis_id as gsis_id,
            rusher,
            'qb_kneel' AS rush_type,
            SUM(qb_kneel) AS attempts,
            SUM(yards_gained) AS yards,
            SUM(rush_touchdown) AS td,
            SUM(fumble) AS fumbles,
            SUM(fumble_lost) AS fumbles_lost,
            SUM(fumble_out_of_bounds) AS fumbles_out_of_bounds,
            SUM(epa) AS epa
        FROM
            play_by_play_enriched
        WHERE
            play_type = 'qb_kneel'
        GROUP BY
            year, season_type, game_id, week,
            posteam, defteam, rusher_position, rusher_gsis_id, rusher
        ORDER BY
            yards DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of qb kneel stats.")
    return df


In [82]:
df_designed = _extract_designed(db_conn)
df_scrambles = _extract_scrambles(db_conn)
df_qb_kneels = _extract_qb_kneels(db_conn)

In [83]:
df_all = pd.concat([df_designed, df_scrambles, df_qb_kneels])
df_all.head()

Unnamed: 0,year,season_type,game_id,opp,team,week,gsis_id,pos,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020,REG,2020_17_JAX_IND,JAX,IND,17.0,00-0036223,RB,J.Taylor,designed,30.0,253.0,2.0,0.0,0.0,0.0,8.501214
1,2020,REG,2020_17_TEN_HOU,HOU,TEN,17.0,00-0032764,RB,D.Henry,designed,34.0,250.0,2.0,2.0,1.0,0.0,3.54582
2,2020,REG,2020_14_TEN_JAX,JAX,TEN,14.0,00-0032764,RB,D.Henry,designed,26.0,215.0,2.0,0.0,0.0,0.0,10.657367
3,2020,REG,2020_06_HOU_TEN,HOU,TEN,6.0,00-0032764,RB,D.Henry,designed,22.0,212.0,2.0,0.0,0.0,0.0,10.613226
4,2020,REG,2020_09_DET_MIN,DET,MIN,9.0,00-0033893,RB,D.Cook,designed,22.0,206.0,2.0,0.0,0.0,0.0,9.00056


In [84]:
grouping_cols = ['year', 'season_type', 'game_id',
                 'team', 'opp', 'week', 'gsis_id', 'pos', 'rusher']
df_totals = df_all.groupby(grouping_cols, as_index=False).sum()
df_totals['rush_type'] = 'total'
df_final = pd.concat([df_all, df_totals])
df_final.head()

Unnamed: 0,year,season_type,game_id,opp,team,week,gsis_id,pos,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020,REG,2020_17_JAX_IND,JAX,IND,17.0,00-0036223,RB,J.Taylor,designed,30.0,253.0,2.0,0.0,0.0,0.0,8.501214
1,2020,REG,2020_17_TEN_HOU,HOU,TEN,17.0,00-0032764,RB,D.Henry,designed,34.0,250.0,2.0,2.0,1.0,0.0,3.54582
2,2020,REG,2020_14_TEN_JAX,JAX,TEN,14.0,00-0032764,RB,D.Henry,designed,26.0,215.0,2.0,0.0,0.0,0.0,10.657367
3,2020,REG,2020_06_HOU_TEN,HOU,TEN,6.0,00-0032764,RB,D.Henry,designed,22.0,212.0,2.0,0.0,0.0,0.0,10.613226
4,2020,REG,2020_09_DET_MIN,DET,MIN,9.0,00-0033893,RB,D.Cook,designed,22.0,206.0,2.0,0.0,0.0,0.0,9.00056


## Trying with pivot

In [85]:
df_final.index

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            2246, 2247, 2248, 2249, 2250, 2251, 2252, 2253, 2254, 2255],
           dtype='int64', length=4938)

In [32]:
df = df_final.pivot(index=grouping_cols, columns='rush_type')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,attempts,attempts,attempts,attempts,yards,yards,yards,yards,td,td,td,td,fumbles,fumbles,fumbles,fumbles,fumbles_lost,fumbles_lost,fumbles_lost,fumbles_lost,fumbles_out_of_bounds,fumbles_out_of_bounds,fumbles_out_of_bounds,fumbles_out_of_bounds,epa,epa,epa,epa
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,rush_type,designed,qb_kneel,scramble,total,designed,qb_kneel,scramble,total,designed,qb_kneel,scramble,total,designed,qb_kneel,scramble,total,designed,qb_kneel,scramble,total,designed,qb_kneel,scramble,total,designed,qb_kneel,scramble,total
year,season_type,game_id,team,opp,week,gsis_id,pos,rusher,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2
2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0034184,RB,G.Edwards,8.0,,,8.0,38.0,,,38.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.691066,,,0.691066
2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0034796,QB,L.Jackson,13.0,2.0,1.0,16.0,90.0,-2.0,48.0,136.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.531735,-3.295695,5.689723,3.925763
2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0035662,WR,M.Brown,2.0,,,2.0,19.0,,,19.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,1.412207,,,1.412207
2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0036158,RB,J.Dobbins,9.0,,,9.0,43.0,,,43.0,1.0,,,1.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.349493,,,0.349493
2020,POST,2020_18_BAL_TEN,TEN,BAL,18.0,00-0029701,QB,R.Tannehill,1.0,,1.0,2.0,2.0,,4.0,6.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,2.028814,,-0.047978,1.980835


In [36]:
df.columns = list(map("_".join, df.columns))
df.reset_index().head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,rusher,attempts_designed,attempts_qb_kneel,attempts_scramble,attempts_total,yards_designed,yards_qb_kneel,yards_scramble,yards_total,td_designed,td_qb_kneel,td_scramble,td_total,fumbles_designed,fumbles_qb_kneel,fumbles_scramble,fumbles_total,fumbles_lost_designed,fumbles_lost_qb_kneel,fumbles_lost_scramble,fumbles_lost_total,fumbles_out_of_bounds_designed,fumbles_out_of_bounds_qb_kneel,fumbles_out_of_bounds_scramble,fumbles_out_of_bounds_total,epa_designed,epa_qb_kneel,epa_scramble,epa_total
0,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0034184,RB,G.Edwards,8.0,,,8.0,38.0,,,38.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.691066,,,0.691066
1,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0034796,QB,L.Jackson,13.0,2.0,1.0,16.0,90.0,-2.0,48.0,136.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.531735,-3.295695,5.689723,3.925763
2,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0035662,WR,M.Brown,2.0,,,2.0,19.0,,,19.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,1.412207,,,1.412207
3,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0036158,RB,J.Dobbins,9.0,,,9.0,43.0,,,43.0,1.0,,,1.0,0.0,,,0.0,0.0,,,0.0,0.0,,,0.0,0.349493,,,0.349493
4,2020,POST,2020_18_BAL_TEN,TEN,BAL,18.0,00-0029701,QB,R.Tannehill,1.0,,1.0,2.0,2.0,,4.0,6.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,2.028814,,-0.047978,1.980835


## Trying with merge

In [100]:
def _extract_designed(db_conn) -> pd.DataFrame:
    """Getting designed rushing stats, per player per game, from the play by play."""
    logging.info("Extracting designed rushing stats from play by play...")
    query = """
        SELECT
            year,
            season_type,
            game_id,
            defteam AS opp,
            posteam AS team,
            week,
            rusher_gsis_id AS gsis_id,
            rusher_position AS pos,
            rusher as player,
            SUM(rush) AS attempts,
            SUM(yards_gained) AS yards,
            SUM(rush_touchdown) AS td,
            SUM(fumble) AS fumbles,
            SUM(fumble_lost) AS fumbles_lost,
            SUM(fumble_out_of_bounds) AS fumbles_out_of_bounds,
            SUM(epa) AS epa
        FROM
            play_by_play_enriched
        WHERE
            play_type = 'run'
            AND two_point_attempt = 0
            AND rusher is not null
        GROUP BY
            year, season_type, game_id, rusher_gsis_id, rusher_position,
            week, posteam, defteam, rusher_id, rusher
        ORDER BY
            yards DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of designed rushing stats.")
    return df


def _extract_scrambles(db_conn) -> pd.DataFrame:
    """Getting scramble runs from the play by play.

    Scrambles 'play_type' of 'run', but the `rusher` value is null and the
    `passer` value is the scrambler.
    """
    logging.info("Extracting scramble rushing stats from play by play...")
    query = """
        SELECT
            year,
            season_type,
            game_id,
            defteam AS opp,
            posteam AS team,
            week,
            passer_gsis_id AS gsis_id,
            passer_position as pos,
            passer AS player,
            SUM(pass) AS attempts,
            SUM(yards_gained) AS yards,
            SUM(rush_touchdown) AS td,
            SUM(fumble) AS fumbles,
            SUM(fumble_lost) AS fumbles_lost,
            SUM(fumble_out_of_bounds) AS fumbles_out_of_bounds,
            SUM(epa) AS epa
        FROM
            play_by_play_enriched
        WHERE
            play_type = 'run'
            AND two_point_attempt = 0
            AND passer is not null
        GROUP BY
            year, season_type, game_id, week,
            posteam, defteam, passer_position, passer_gsis_id, passer
        ORDER BY
            yards DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of scramble rushing stats.")
    return df


def _extract_qb_kneels(db_conn) -> pd.DataFrame:
    """Getting qb kneels from the play by play since they are included in rushing stats."""
    logging.info("Extracting qb kneel stats from play by play...")
    query = """
        SELECT
            year,
            season_type,
            game_id,
            defteam AS opp,
            posteam AS team,
            week,
            rusher_position as pos,
            rusher_gsis_id as gsis_id,
            rusher as player,
            SUM(qb_kneel) AS attempts,
            SUM(yards_gained) AS yards,
            SUM(rush_touchdown) AS td,
            SUM(fumble) AS fumbles,
            SUM(fumble_lost) AS fumbles_lost,
            SUM(fumble_out_of_bounds) AS fumbles_out_of_bounds,
            SUM(epa) AS epa
        FROM
            play_by_play_enriched
        WHERE
            play_type = 'qb_kneel'
        GROUP BY
            year, season_type, game_id, week,
            posteam, defteam, rusher_position, rusher_gsis_id, rusher
        ORDER BY
            yards DESC
    """
    df = pd.read_sql(query, db_conn)
    logging.info(f"Extracted {len(df)} rows of qb kneel stats.")
    return df


In [101]:
df_designed = _extract_designed(db_conn)
df_scrambles = _extract_scrambles(db_conn)
df_qb_kneels = _extract_qb_kneels(db_conn)

In [102]:
df_all = pd.concat([df_designed, df_scrambles, df_qb_kneels])

grouping_cols = ['year', 'season_type', 'game_id',
                 'team', 'opp', 'week', 'gsis_id', 'pos', 'player']

df_totals = df_all.groupby(grouping_cols, as_index=False).sum()

In [104]:
cols = ['year', 'season_type', 'game_id', 'team', 'opp', 'week', 'gsis_id', 'pos', 'player']

def rename_cols(df: pd.DataFrame, suffix: str, excempt: list) -> pd.DataFrame:
    return df.rename(columns={
        col: f"{col}{suffix}" for col in df.columns if col not in excempt
    })

df_totals = rename_cols(df_totals, '_total', cols)
df_designed = rename_cols(df_designed, '_designed', cols)
df_scrambles = rename_cols(df_scrambles, '_scramble', cols)
df_qb_kneels = rename_cols(df_qb_kneels, '_kneel', cols)


In [113]:
df_all = (df_totals 
    .merge(
        df_designed,
        how='outer',
        on=cols,
    )
    .merge(
        df_scrambles,
        how='outer',
        on=cols,
    )
    .merge(
        df_qb_kneels,
        how='outer',
        on=cols,
    )
    .fillna(0)
)
df_all.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,attempts_total,yards_total,td_total,fumbles_total,fumbles_lost_total,fumbles_out_of_bounds_total,epa_total,attempts_designed,yards_designed,td_designed,fumbles_designed,fumbles_lost_designed,fumbles_out_of_bounds_designed,epa_designed,attempts_scramble,yards_scramble,td_scramble,fumbles_scramble,fumbles_lost_scramble,fumbles_out_of_bounds_scramble,epa_scramble,attempts_kneel,yards_kneel,td_kneel,fumbles_kneel,fumbles_lost_kneel,fumbles_out_of_bounds_kneel,epa_kneel
0,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0034184,RB,G.Edwards,8.0,38.0,0.0,0.0,0.0,0.0,0.691066,8.0,38.0,0.0,0.0,0.0,0.0,0.691066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0034796,QB,L.Jackson,16.0,136.0,1.0,0.0,0.0,0.0,3.925763,13.0,90.0,0.0,0.0,0.0,0.0,1.531735,1.0,48.0,1.0,0.0,0.0,0.0,5.689723,2.0,-2.0,0.0,0.0,0.0,0.0,-3.295695
2,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0035662,WR,M.Brown,2.0,19.0,0.0,0.0,0.0,0.0,1.412207,2.0,19.0,0.0,0.0,0.0,0.0,1.412207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020,POST,2020_18_BAL_TEN,BAL,TEN,18.0,00-0036158,RB,J.Dobbins,9.0,43.0,1.0,0.0,0.0,0.0,0.349493,9.0,43.0,1.0,0.0,0.0,0.0,0.349493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020,POST,2020_18_BAL_TEN,TEN,BAL,18.0,00-0029701,QB,R.Tannehill,2.0,6.0,0.0,0.0,0.0,0.0,1.980835,1.0,2.0,0.0,0.0,0.0,0.0,2.028814,1.0,4.0,0.0,0.0,0.0,0.0,-0.047978,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [115]:
df_all['attempts'] = df_all['attempts_designed'] + df_all['attempts_scramble']
df_all['yards'] = df_all['yards_designed'] + df_all['yards_scramble']
df_all['td'] = df_all['td_designed'] + df_all['td_scramble']
df_all['fumbles'] = df_all['fumbles_designed'] + df_all['fumbles_scramble']
df_all['fumbles_lost'] = df_all['fumbles_lost_designed'] + df_all['fumbles_lost_scramble']
df_all['fumbles_out_of_bounds'] = df_all['fumbles_out_of_bounds_designed'] + df_all['fumbles_out_of_bounds_scramble']
df_all['epa'] = df_all['epa_designed'] + df_all['epa_scramble']


In [116]:
df_all.to_csv("new_rushing.csv", index=False)

## rushing by play by year

In [135]:
query = """SELECT * FROM rushing_by_player_by_game"""
df = pd.read_sql(query, db_conn)
df.head()

Unnamed: 0,year,season_type,game_id,team,opp,week,gsis_id,pos,player,attempts_total,yards_total,td_total,fumbles_total,fumbles_lost_total,fumbles_out_of_bounds_total,epa_total,attempts_designed,yards_designed,td_designed,fumbles_designed,fumbles_lost_designed,fumbles_out_of_bounds_designed,epa_designed,attempts_scramble,yards_scramble,td_scramble,fumbles_scramble,fumbles_lost_scramble,fumbles_out_of_bounds_scramble,epa_scramble,attempts_kneel,yards_kneel,td_kneel,fumbles_kneel,fumbles_lost_kneel,fumbles_out_of_bounds_kneel,epa_kneel,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020,POST,2020_18_BAL_TEN,BAL,TEN,18,00-0034184,RB,G.Edwards,8,38,0,0,0,0,0.691066,8,38,0,0,0,0,0.691066,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,8,38,0,0,0,0,0.691066
1,2020,POST,2020_18_BAL_TEN,BAL,TEN,18,00-0034796,QB,L.Jackson,16,136,1,0,0,0,3.925763,13,90,0,0,0,0,1.531735,1,48,1,0,0,0,5.689723,2,-2,0,0,0,0,-3.295695,14,138,1,0,0,0,7.221458
2,2020,POST,2020_18_BAL_TEN,BAL,TEN,18,00-0035662,WR,M.Brown,2,19,0,0,0,0,1.412207,2,19,0,0,0,0,1.412207,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,2,19,0,0,0,0,1.412207
3,2020,POST,2020_18_BAL_TEN,BAL,TEN,18,00-0036158,RB,J.Dobbins,9,43,1,0,0,0,0.349493,9,43,1,0,0,0,0.349493,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,9,43,1,0,0,0,0.349493
4,2020,POST,2020_18_BAL_TEN,TEN,BAL,18,00-0029701,QB,R.Tannehill,2,6,0,0,0,0,1.980835,1,2,0,0,0,0,2.028814,1,4,0,0,0,0,-0.047978,0,0,0,0,0,0,0.0,2,6,0,0,0,0,1.980835


In [136]:
grouping_cols = ['year', 'season_type', 'team', 'gsis_id', 'pos', 'player']
df = df.groupby(grouping_cols, as_index=False).sum()
df_size = df.groupby(grouping_cols, as_index=False).size()
df_size = df_size.rename(columns={'size': 'games'})
df_all = df.merge(df_size, on=grouping_cols)
df_all.head()

Unnamed: 0,year,season_type,team,gsis_id,pos,player,week,attempts_total,yards_total,td_total,fumbles_total,fumbles_lost_total,fumbles_out_of_bounds_total,epa_total,attempts_designed,yards_designed,td_designed,fumbles_designed,fumbles_lost_designed,fumbles_out_of_bounds_designed,epa_designed,attempts_scramble,yards_scramble,td_scramble,fumbles_scramble,fumbles_lost_scramble,fumbles_out_of_bounds_scramble,epa_scramble,attempts_kneel,yards_kneel,td_kneel,fumbles_kneel,fumbles_lost_kneel,fumbles_out_of_bounds_kneel,epa_kneel,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa,games
0,2020,POST,BAL,00-0034184,RB,G.Edwards,37,18,80,0,1,0,0,0.807928,18,80,0,1,0,0,0.807928,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,18,80,0,1,0,0,0.807928,1
1,2020,POST,BAL,00-0034796,QB,L.Jackson,37,25,170,1,1,0,0,4.371572,19,101,0,1,0,0,-0.693216,4,71,1,0,0,0,8.360483,2,-2,0,0,0,0,-3.295695,23,172,1,1,0,0,7.667267,1
2,2020,POST,BAL,00-0035662,WR,M.Brown,18,2,19,0,0,0,0,1.412207,2,19,0,0,0,0,1.412207,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,2,19,0,0,0,0,1.412207,1
3,2020,POST,BAL,00-0035993,QB,T.Huntley,19,3,32,0,0,0,0,0.885258,0,0,0,0,0,0,0.0,3,32,0,0,0,0,0.885258,0,0,0,0,0,0,0.0,3,32,0,0,0,0,0.885258,1
4,2020,POST,BAL,00-0036158,RB,J.Dobbins,37,19,85,1,0,0,0,-0.790664,19,85,1,0,0,0,-0.790664,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,19,85,1,0,0,0,-0.790664,1


In [137]:
db_conn.close()