In [1]:
import os
import sys

import numpy as np
import pandas as pd

In [2]:
sys.path.insert(0, '..')

In [3]:
from app import db

## Validate the DB works as expected

In [4]:
db_conn = db.get_db_conn()

In [18]:
query = "SELECT * FROM play_by_play"

In [21]:
df = pd.read_sql(query, db_conn)

In [22]:
df

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,receiver_id,name,jersey_number,id,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd


In [41]:
DATA_DIRECTORY = '../data'
PBP_PATH = os.path.join(DATA_DIRECTORY, 'play_by_play', 'play_by_play.csv')
df = pd.read_csv(PBP_PATH, low_memory=False)
df.shape

(37401, 340)

In [42]:
df.to_sql('play_by_play', db_conn, index=False, if_exists='replace')

In [43]:
df = pd.read_sql(query, db_conn)
df.shape

(362, 5)

In [44]:
df.head()

Unnamed: 0,rusher,posteam,play_type,yds,td
0,D.Henry,TEN,run,1532.0,14.0
1,D.Cook,MIN,run,1352.0,14.0
2,J.Robinson,JAX,run,1035.0,7.0
3,R.Jones,TB,run,900.0,6.0
4,K.Drake,ARI,run,848.0,9.0


In [45]:
df.shape

(362, 5)

## Rushing Stats - Designed Runs

In [90]:
query = """
    SELECT 
        rusher, posteam, sum(rush) as att,
        sum(yards_gained) as yds, sum(rush_touchdown) as td
    FROM play_by_play
    WHERE 
        play_type = 'run'
        AND sack = 0
        AND two_point_attempt = 0
    GROUP BY
        posteam, rusher
    ORDER BY
        yds DESC
"""
df_rush = pd.read_sql(query, db_conn)
df_rush.shape

(362, 5)

In [91]:
df_rush.head(10)

Unnamed: 0,rusher,posteam,att,yds,td
0,D.Henry,TEN,297.0,1532.0,14.0
1,D.Cook,MIN,273.0,1352.0,14.0
2,J.Robinson,JAX,224.0,1035.0,7.0
3,R.Jones,TB,180.0,900.0,6.0
4,K.Drake,ARI,201.0,848.0,9.0
5,J.Jacobs,LV,220.0,832.0,9.0
6,E.Elliott,DAL,211.0,832.0,5.0
7,A.Jones,GB,160.0,823.0,7.0
8,N.Chubb,CLE,133.0,797.0,7.0
9,D.Montgomery,CHI,170.0,760.0,4.0


In [92]:
df_rush.tail(10)

Unnamed: 0,rusher,posteam,att,yds,td
352,E.Stick,LAC,1.0,-2.0,0.0
353,K.Raymond,TEN,1.0,-3.0,0.0
354,A.Roberts,BUF,1.0,-3.0,0.0
355,J.Guyton,LAC,1.0,-4.0,0.0
356,T.Montgomery,NO,1.0,-4.0,0.0
357,G.Ward,PHI,1.0,-6.0,0.0
358,A.Isabella,ARI,1.0,-6.0,0.0
359,C.Wilson,DAL,3.0,-12.0,0.0
360,A.Killins,PHI,1.0,-12.0,0.0
361,T.Long,LAC,1.0,-28.0,0.0


## Rushing Stats - Scrambles

In [87]:
query = """
    SELECT 
        *
    FROM play_by_play
    WHERE 
        play_type = 'run'
        AND sack = 0
        AND two_point_attempt = 0
"""
df_scrambles = pd.read_sql(query, db_conn)

In [88]:
df_scrambles['passer'].value_counts().head()

D.Watson     53
R.Wilson     45
K.Murray     42
L.Jackson    35
P.Mahomes    34
Name: passer, dtype: int64

In [89]:
query = """
    SELECT 
        passer, posteam, sum(pass) as att,
        sum(yards_gained) as yds, sum(rush_touchdown) as td
    FROM play_by_play
    WHERE
        play_type = 'run'
        AND sack = 0
        AND two_point_attempt = 0
        AND passer is not null
    GROUP BY
        posteam, passer
    ORDER BY att DESC
"""
df_scrambles = pd.read_sql(query, db_conn)
df_scrambles.head()

Unnamed: 0,passer,posteam,att,yds,td
0,D.Watson,HOU,53.0,334.0,1.0
1,R.Wilson,SEA,45.0,365.0,0.0
2,K.Murray,ARI,42.0,370.0,1.0
3,L.Jackson,BAL,35.0,231.0,0.0
4,P.Mahomes,KC,34.0,225.0,1.0


In [94]:
df_rush[df_rush['rusher'] == 'K.Murray']

Unnamed: 0,rusher,posteam,att,yds,td
44,K.Murray,ARI,64.0,361.0,9.0


In [96]:
df_scrambles[df_scrambles['passer'] == 'K.Murray']

Unnamed: 0,passer,posteam,att,yds,td
2,K.Murray,ARI,42.0,370.0,1.0


In [107]:
# should be 712
df_rush[df_rush['rusher'] == 'K.Murray']['yds'].values[0] + \
    df_scrambles[df_scrambles['passer'] == 'K.Murray']['yds'].values[0]

731.0

In [109]:
# should be 369
df_rush[df_rush['rusher'] == 'D.Watson']['yds'].values[0] + \
    df_scrambles[df_scrambles['passer'] == 'D.Watson']['yds'].values[0]

379.0

In [111]:
# should be 451
df_rush[df_rush['rusher'] == 'C.Newton']['yds'].values[0] + \
    df_scrambles[df_scrambles['passer'] == 'C.Newton']['yds'].values[0]

461.0

In [121]:
query = """
    SELECT 
        rusher, posteam, sum(qb_kneel) as att,
        sum(yards_gained) as yds, sum(rush_touchdown) as td
    FROM play_by_play
    WHERE
        play_type = 'qb_kneel'
    GROUP BY
        posteam, rusher
    ORDER BY att DESC
"""
df_kneels = pd.read_sql(query, db_conn)
df_kneels.head()

Unnamed: 0,rusher,posteam,att,yds,td
0,J.Allen,BUF,20.0,-18.0,0.0
1,J.Goff,LA,17.0,-17.0,0.0
2,P.Rivers,IND,13.0,-11.0,0.0
3,B.Mayfield,CLE,13.0,-14.0,0.0
4,T.Brady,TB,12.0,-9.0,0.0


In [123]:
df_rush[df_rush['rusher'] == 'C.Newton']['yds'].values[0] + \
    df_scrambles[df_scrambles['passer'] == 'C.Newton']['yds'].values[0] + \
    df_kneels[df_kneels['rusher'] == 'C.Newton']['yds'].values[0]

451.0

In [125]:
# should be 712
df_rush[df_rush['rusher'] == 'K.Murray']['yds'].values[0] + \
    df_scrambles[df_scrambles['passer'] == 'K.Murray']['yds'].values[0] + \
    df_kneels[df_kneels['rusher'] == 'K.Murray']['yds'].values[0]

712.0

In [126]:
# should be 369
df_rush[df_rush['rusher'] == 'D.Watson']['yds'].values[0] + \
    df_scrambles[df_scrambles['passer'] == 'D.Watson']['yds'].values[0] + \
    df_kneels[df_kneels['rusher'] == 'D.Watson']['yds'].values[0]

369.0

## Run the Pipeline

In [168]:
query = """
    SELECT
        game_id,
        posteam as team,
        defteam as def_team,
        week,
        rusher_id,
        rusher,
        'designed' as rush_type,
        sum(rush) as attempts,
        sum(yards_gained) as yards,
        sum(rush_touchdown) as td,
        sum(fumble) as fumbles,
        sum(fumble_lost) as fumbles_lost,
        sum(fumble_out_of_bounds) as fumbles_out_of_bounds,
        sum(epa) as epa
    FROM
        play_by_play
    WHERE 
        play_type = 'run'
        AND sack = 0
        AND two_point_attempt = 0
        AND rusher is not null
    GROUP BY
        game_id, week, posteam, defteam, rusher_id, rusher
    ORDER BY
        yards DESC
"""
rushing_designed = pd.read_sql(query, db_conn)
print(rushing_designed.shape)
rushing_designed.head(10)

(1573, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_14_TEN_JAX,TEN,JAX,14,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,26.0,215.0,2.0,0.0,0.0,0.0,10.657379
1,2020_06_HOU_TEN,TEN,HOU,6,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,22.0,212.0,2.0,0.0,0.0,0.0,10.613235
2,2020_09_DET_MIN,MIN,DET,9,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,206.0,2.0,0.0,0.0,0.0,9.000564
3,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3033-3438-313686f35185,R.Jones,designed,23.0,192.0,1.0,0.0,0.0,0.0,6.98484
4,2020_03_TEN_MIN,MIN,TEN,3,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,181.0,1.0,1.0,1.0,0.0,0.570586
5,2020_12_TEN_IND,TEN,IND,12,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,27.0,178.0,3.0,0.0,0.0,0.0,11.944911
6,2020_14_NE_LA,LA,NE,14,32013030-2d30-3033-3634-3134cebf85b9,C.Akers,designed,29.0,171.0,0.0,0.0,0.0,0.0,0.29784
7,2020_02_DET_GB,GB,DET,2,32013030-2d30-3033-3332-3933ed82c0de,A.Jones,designed,18.0,168.0,2.0,0.0,0.0,0.0,8.100863
8,2020_06_ARI_DAL,ARI,DAL,6,32013030-2d30-3033-3331-31385a388006,K.Drake,designed,20.0,164.0,2.0,0.0,0.0,0.0,9.449478
9,2020_08_MIN_GB,MIN,GB,8,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,30.0,163.0,3.0,0.0,0.0,0.0,7.858157


In [169]:
query = """
    SELECT
        game_id,
        posteam as team,
        defteam as def_team,
        week,
        passer_id as rusher_id,
        passer as rusher,
        'scramble' as rush_type,
        sum(rush) as attempts,
        sum(yards_gained) as yards,
        sum(rush_touchdown) as td,
        sum(fumble) as fumbles,
        sum(fumble_lost) as fumbles_lost,
        sum(fumble_out_of_bounds) as fumbles_out_of_bounds,
        sum(epa) as epa
    FROM
        play_by_play
    WHERE 
        play_type = 'run'
        AND sack = 0
        AND two_point_attempt = 0
        AND passer is not null
    GROUP BY
        game_id, week, posteam, defteam, passer_id, passer
    ORDER BY
        yards DESC
"""
rushing_scrambles = pd.read_sql(query, db_conn)
print(rushing_scrambles.shape)
rushing_scrambles.head(10)

(306, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_01_ARI_SF,ARI,SF,1,32013030-2d30-3033-3532-323871689b29,K.Murray,scramble,0.0,90.0,1.0,0.0,0.0,0.0,11.362027
1,2020_04_DEN_NYJ,NYJ,DEN,4,32013030-2d30-3033-3438-36394b743d00,S.Darnold,scramble,0.0,81.0,1.0,0.0,0.0,0.0,7.497079
2,2020_13_NO_ATL,NO,ATL,13,32013030-2d30-3033-3333-3537640eb4b6,T.Hill,scramble,0.0,60.0,0.0,0.0,0.0,0.0,3.085398
3,2020_10_SEA_LA,SEA,LA,10,32013030-2d30-3032-3932-363323eefb5c,R.Wilson,scramble,0.0,60.0,0.0,0.0,0.0,0.0,0.124021
4,2020_04_ARI_CAR,ARI,CAR,4,32013030-2d30-3033-3532-323871689b29,K.Murray,scramble,0.0,60.0,0.0,0.0,0.0,0.0,3.50955
5,2020_05_MIN_SEA,SEA,MIN,5,32013030-2d30-3032-3932-363323eefb5c,R.Wilson,scramble,0.0,58.0,0.0,0.0,0.0,0.0,2.473476
6,2020_03_CIN_PHI,PHI,CIN,3,32013030-2d30-3033-3239-3530c5ee180a,C.Wentz,scramble,0.0,57.0,1.0,0.0,0.0,0.0,8.297576
7,2020_02_KC_LAC,KC,LAC,2,32013030-2d30-3033-3338-3733fa30c4fa,P.Mahomes,scramble,0.0,54.0,0.0,0.0,0.0,0.0,5.541541
8,2020_09_HOU_JAX,HOU,JAX,9,32013030-2d30-3033-3335-33372bbab8b8,D.Watson,scramble,0.0,53.0,0.0,0.0,0.0,0.0,7.556145
9,2020_06_DEN_NE,NE,DEN,6,32013030-2d30-3032-3739-33399bbb0097,C.Newton,scramble,0.0,52.0,0.0,0.0,0.0,0.0,2.559935


In [170]:
query = """
    SELECT
        game_id,
        posteam as team,
        defteam as def_team,
        week,
        rusher_id,
        rusher,
        'qb_kneel' as rush_type,
        sum(qb_kneel) as attempts,
        sum(yards_gained) as yards,
        sum(rush_touchdown) as td,
        sum(fumble) as fumbles,
        sum(fumble_lost) as fumbles_lost,
        sum(fumble_out_of_bounds) as fumbles_out_of_bounds,
        sum(epa) as epa
    FROM
        play_by_play
    WHERE 
        play_type = 'qb_kneel'
    GROUP BY
        game_id, week, posteam, defteam, rusher_id, rusher
    ORDER BY
        yards DESC
"""
rushing_kneels = pd.read_sql(query, db_conn)
print(rushing_kneels.shape)
rushing_kneels.head(10)

(188, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3032-3739-34386d7e96d3,B.Gabbert,qb_kneel,3.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020_12_CHI_GB,GB,CHI,12,32013030-2d30-3033-3431-37377f312969,T.Boyle,qb_kneel,2.0,0.0,0.0,0.0,0.0,0.0,-3.319743
2,2020_06_GB_TB,TB,GB,6,32013030-2d30-3032-3739-34386d7e96d3,B.Gabbert,qb_kneel,2.0,0.0,0.0,0.0,0.0,0.0,-4.122151
3,2020_02_MIN_IND,IND,MIN,2,32013030-2d30-3032-3239-34322bc30020,P.Rivers,qb_kneel,3.0,0.0,0.0,0.0,0.0,0.0,-4.47995
4,2020_03_NYJ_IND,IND,NYJ,3,32013030-2d30-3033-3331-3139ba7e51e2,J.Brissett,qb_kneel,2.0,0.0,0.0,0.0,0.0,0.0,-3.61784
5,2020_12_CAR_MIN,MIN,CAR,12,32013030-2d30-3032-3936-303492e9d55e,K.Cousins,qb_kneel,1.0,-1.0,0.0,0.0,0.0,0.0,0.0
6,2020_13_DET_CHI,DET,CHI,13,32013030-2d30-3032-3634-39382e08c011,M.Stafford,qb_kneel,1.0,-1.0,0.0,0.0,0.0,0.0,0.0
7,2020_12_KC_TB,KC,TB,12,32013030-2d30-3033-3338-3733fa30c4fa,P.Mahomes,qb_kneel,1.0,-1.0,0.0,0.0,0.0,0.0,-1.514402
8,2020_12_KC_TB,TB,KC,12,32013030-2d30-3031-3935-39361b587621,T.Brady,qb_kneel,1.0,-1.0,0.0,0.0,0.0,0.0,0.0
9,2020_12_LAC_BUF,BUF,LAC,12,32013030-2d30-3033-3438-3537f3a13a66,J.Allen,qb_kneel,1.0,-1.0,0.0,0.0,0.0,0.0,0.0


In [181]:
rushing_designed.to_csv("../data/rushing_designed.csv", index=False)
rushing_scrambles.to_csv("../data/rushing_scrambles.csv", index=False)
rushing_kneels.to_csv("../data/rushing_kneels.csv", index=False)
print(rushing_designed.shape)
print(rushing_scrambles.shape)
print(rushing_kneels.shape)

(1573, 14)
(306, 14)
(188, 14)


In [160]:
rushing_designed[rushing_designed['rusher'].isna()]

Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa


In [178]:
df_all = pd.concat([rushing_designed, rushing_scrambles, rushing_kneels])
print(df_all.shape)
df_all.head()

(2067, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_14_TEN_JAX,TEN,JAX,14,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,26.0,215.0,2.0,0.0,0.0,0.0,10.657379
1,2020_06_HOU_TEN,TEN,HOU,6,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,22.0,212.0,2.0,0.0,0.0,0.0,10.613235
2,2020_09_DET_MIN,MIN,DET,9,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,206.0,2.0,0.0,0.0,0.0,9.000564
3,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3033-3438-313686f35185,R.Jones,designed,23.0,192.0,1.0,0.0,0.0,0.0,6.98484
4,2020_03_TEN_MIN,MIN,TEN,3,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,181.0,1.0,1.0,1.0,0.0,0.570586


In [179]:
grouping_cols = ['game_id', 'team', 'def_team', 'week', 'rusher_id', 'rusher']
df_totals = df.groupby(grouping_cols, as_index=False).sum()
df_totals['rush_type'] = 'total'
print(df_totals.shape)
df_totals.sort_values('yards', ascending=False).head(10)

(1735, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa,rush_type
1722,2020_14_TEN_JAX,TEN,JAX,14,32013030-2d30-3033-3237-3634214e1686,D.Henry,26.0,215.0,2.0,0.0,0.0,0.0,10.657379,total
747,2020_06_HOU_TEN,TEN,HOU,6,32013030-2d30-3033-3237-3634214e1686,D.Henry,22.0,212.0,2.0,0.0,0.0,0.0,10.613235,total
1057,2020_09_DET_MIN,MIN,DET,9,32013030-2d30-3033-3338-39337f9ee497,D.Cook,22.0,206.0,2.0,0.0,0.0,0.0,9.000564,total
1236,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3033-3438-313686f35185,R.Jones,23.0,192.0,1.0,0.0,0.0,0.0,6.98484,total
393,2020_03_TEN_MIN,MIN,TEN,3,32013030-2d30-3033-3338-39337f9ee497,D.Cook,22.0,181.0,1.0,1.0,1.0,0.0,0.570586,total
1478,2020_12_TEN_IND,TEN,IND,12,32013030-2d30-3033-3237-3634214e1686,D.Henry,27.0,178.0,3.0,0.0,0.0,0.0,11.944911,total
1688,2020_14_NE_LA,LA,NE,14,32013030-2d30-3033-3634-3134cebf85b9,C.Akers,29.0,171.0,0.0,0.0,0.0,0.0,0.29784,total
190,2020_02_DET_GB,GB,DET,2,32013030-2d30-3033-3332-3933ed82c0de,A.Jones,18.0,168.0,2.0,0.0,0.0,0.0,8.100863,total
671,2020_06_ARI_DAL,ARI,DAL,6,32013030-2d30-3033-3331-31385a388006,K.Drake,20.0,164.0,2.0,0.0,0.0,0.0,9.449478,total
959,2020_08_MIN_GB,MIN,GB,8,32013030-2d30-3033-3338-39337f9ee497,D.Cook,30.0,163.0,3.0,0.0,0.0,0.0,7.858157,total


In [182]:
df_final = pd.concat([df_all, df_totals])
print(df_final.shape)
df_final.sort_values('yards', ascending=False).head(10)

(3802, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_14_TEN_JAX,TEN,JAX,14,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,26.0,215.0,2.0,0.0,0.0,0.0,10.657379
1722,2020_14_TEN_JAX,TEN,JAX,14,32013030-2d30-3033-3237-3634214e1686,D.Henry,total,26.0,215.0,2.0,0.0,0.0,0.0,10.657379
1,2020_06_HOU_TEN,TEN,HOU,6,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,22.0,212.0,2.0,0.0,0.0,0.0,10.613235
747,2020_06_HOU_TEN,TEN,HOU,6,32013030-2d30-3033-3237-3634214e1686,D.Henry,total,22.0,212.0,2.0,0.0,0.0,0.0,10.613235
2,2020_09_DET_MIN,MIN,DET,9,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,206.0,2.0,0.0,0.0,0.0,9.000564
1057,2020_09_DET_MIN,MIN,DET,9,32013030-2d30-3033-3338-39337f9ee497,D.Cook,total,22.0,206.0,2.0,0.0,0.0,0.0,9.000564
1236,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3033-3438-313686f35185,R.Jones,total,23.0,192.0,1.0,0.0,0.0,0.0,6.98484
3,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3033-3438-313686f35185,R.Jones,designed,23.0,192.0,1.0,0.0,0.0,0.0,6.98484
393,2020_03_TEN_MIN,MIN,TEN,3,32013030-2d30-3033-3338-39337f9ee497,D.Cook,total,22.0,181.0,1.0,1.0,1.0,0.0,0.570586
4,2020_03_TEN_MIN,MIN,TEN,3,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,181.0,1.0,1.0,1.0,0.0,0.570586


In [183]:
df_final.to_csv("../data/rushing_all.csv", index=False)

## Validate table results


In [5]:
query = """
    SELECT
        *
    FROM
        rushing_by_player_by_game
"""
df = pd.read_sql(query, db_conn)
print(df.shape)
df.head(10)

(3819, 14)


Unnamed: 0,game_id,team,def_team,week,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_14_TEN_JAX,TEN,JAX,14,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,26.0,215.0,2.0,0.0,0.0,0.0,10.657379
1,2020_06_HOU_TEN,TEN,HOU,6,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,22.0,212.0,2.0,0.0,0.0,0.0,10.613235
2,2020_09_DET_MIN,MIN,DET,9,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,206.0,2.0,0.0,0.0,0.0,9.000564
3,2020_10_TB_CAR,TB,CAR,10,32013030-2d30-3033-3438-313686f35185,R.Jones,designed,23.0,192.0,1.0,0.0,0.0,0.0,6.98484
4,2020_03_TEN_MIN,MIN,TEN,3,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,22.0,181.0,1.0,1.0,1.0,0.0,0.570586
5,2020_12_TEN_IND,TEN,IND,12,32013030-2d30-3033-3237-3634214e1686,D.Henry,designed,27.0,178.0,3.0,0.0,0.0,0.0,11.944911
6,2020_14_NE_LA,LA,NE,14,32013030-2d30-3033-3634-3134cebf85b9,C.Akers,designed,29.0,171.0,0.0,0.0,0.0,0.0,0.29784
7,2020_02_DET_GB,GB,DET,2,32013030-2d30-3033-3332-3933ed82c0de,A.Jones,designed,18.0,168.0,2.0,0.0,0.0,0.0,8.100863
8,2020_06_ARI_DAL,ARI,DAL,6,32013030-2d30-3033-3331-31385a388006,K.Drake,designed,20.0,164.0,2.0,0.0,0.0,0.0,9.449478
9,2020_08_MIN_GB,MIN,GB,8,32013030-2d30-3033-3338-39337f9ee497,D.Cook,designed,30.0,163.0,3.0,0.0,0.0,0.0,7.858157


## Creating rushing by player by year

In [18]:
query = """
    SELECT
        team,
        rusher_id,
        rusher,
        rush_type,
        SUM(attempts) AS attempts,
        SUM(yards) AS yards,
        SUM(td) AS td,
        SUM(fumbles) AS fumbles,
        SUM(fumbles_lost) AS fumbles_lost,
        SUM(fumbles_out_of_bounds) AS fumbles_out_of_bounds,
        SUM(epa) AS epa
    FROM
        rushing_by_player_by_game
    WHERE
        rush_type = 'total'
    GROUP BY
        team,
        rusher_id,
        rusher,
        rush_type
    ORDER BY
        SUM(yards) DESC
"""
df = pd.read_sql(query, db_conn)
print(df.shape)
df.head(10)

(342, 11)


Unnamed: 0,team,rusher_id,rusher,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,TEN,32013030-2d30-3033-3237-3634214e1686,D.Henry,total,297.0,1532.0,14.0,2.0,1.0,0.0,26.415387
1,MIN,32013030-2d30-3033-3338-39337f9ee497,D.Cook,total,273.0,1352.0,14.0,3.0,2.0,0.0,13.897016
2,JAX,32013030-2d30-3033-3538-3331fa153077,J.Robinson,total,224.0,1035.0,7.0,1.0,0.0,0.0,-7.509769
3,TB,32013030-2d30-3033-3438-313686f35185,R.Jones,total,180.0,900.0,6.0,0.0,0.0,0.0,1.391972
4,CLE,32013030-2d30-3033-3437-3931e5305530,N.Chubb,total,150.0,879.0,9.0,1.0,1.0,0.0,14.817243
5,ARI,32013030-2d30-3033-3331-31385a388006,K.Drake,total,201.0,848.0,9.0,3.0,1.0,0.0,-2.131132
6,LV,32013030-2d30-3033-3537-3030adfaf832,J.Jacobs,total,220.0,832.0,9.0,2.0,2.0,0.0,-27.77281
7,DAL,32013030-2d30-3033-3330-343523cdc4a0,E.Elliott,total,211.0,832.0,5.0,5.0,4.0,0.0,-25.727005
8,GB,32013030-2d30-3033-3332-3933ed82c0de,A.Jones,total,160.0,823.0,7.0,1.0,0.0,1.0,6.23486
9,BAL,32013030-2d30-3033-3437-39369c80073a,L.Jackson,total,79.0,793.0,6.0,5.0,1.0,0.0,37.441999


## Creating rushing by team by game

In [20]:
query = """
    SELECT
        game_id,
        team,
        def_team,
        rush_type,
        SUM(attempts) AS attempts,
        SUM(yards) AS yards,
        SUM(td) AS td,
        SUM(fumbles) AS fumbles,
        SUM(fumbles_lost) AS fumbles_lost,
        SUM(fumbles_out_of_bounds) AS fumbles_out_of_bounds,
        SUM(epa) AS epa
    FROM
        rushing_by_player_by_game
    WHERE
        rush_type = 'total'
    GROUP BY
        game_id,
        team,
        def_team,
        rush_type
    ORDER BY
        SUM(yards) DESC
"""
df = pd.read_sql(query, db_conn)
print(df.shape)
df.head(10)

(416, 11)


Unnamed: 0,game_id,team,def_team,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,2020_04_CLE_DAL,CLE,DAL,total,37.0,307.0,3.0,0.0,0.0,0.0,16.88586
1,2020_13_DAL_BAL,BAL,DAL,total,32.0,294.0,2.0,1.0,0.0,0.0,19.281347
2,2020_09_DET_MIN,MIN,DET,total,34.0,275.0,2.0,0.0,0.0,0.0,8.886375
3,2020_08_PIT_BAL,BAL,PIT,total,42.0,265.0,1.0,2.0,1.0,0.0,3.336742
4,2020_06_HOU_TEN,TEN,HOU,total,27.0,263.0,2.0,0.0,0.0,0.0,11.856426
5,2020_06_ARI_DAL,ARI,DAL,total,32.0,261.0,3.0,0.0,0.0,0.0,17.709854
6,2020_02_DET_GB,GB,DET,total,31.0,259.0,2.0,0.0,0.0,0.0,7.544573
7,2020_03_LV_NE,NE,LV,total,34.0,250.0,2.0,0.0,0.0,0.0,11.622939
8,2020_14_TEN_JAX,TEN,JAX,total,40.0,249.0,2.0,0.0,0.0,0.0,8.180812
9,2020_14_NO_PHI,PHI,NO,total,29.0,246.0,2.0,1.0,1.0,0.0,0.146501


## Creating rushing by team by year

In [22]:
query = """
    SELECT
        team,
        rush_type,
        SUM(attempts) AS attempts,
        SUM(yards) AS yards,
        SUM(td) AS td,
        SUM(fumbles) AS fumbles,
        SUM(fumbles_lost) AS fumbles_lost,
        SUM(fumbles_out_of_bounds) AS fumbles_out_of_bounds,
        SUM(epa) AS epa
    FROM
        rushing_by_player_by_game
    WHERE
        rush_type = 'total'
    GROUP BY
        team,
        rush_type
    ORDER BY
        SUM(yards) DESC
"""
df = pd.read_sql(query, db_conn)
print(df.shape)
df.head(10)

(32, 9)


Unnamed: 0,team,rush_type,attempts,yards,td,fumbles,fumbles_lost,fumbles_out_of_bounds,epa
0,BAL,total,373.0,2259.0,19.0,11.0,4.0,0.0,38.850055
1,TEN,total,390.0,2051.0,18.0,4.0,1.0,0.0,24.253157
2,CLE,total,384.0,2028.0,16.0,6.0,3.0,0.0,-5.034359
3,ARI,total,353.0,1966.0,20.0,8.0,1.0,1.0,32.118803
4,NE,total,394.0,1918.0,19.0,6.0,1.0,0.0,24.352766
5,MIN,total,374.0,1910.0,15.0,5.0,3.0,0.0,3.911294
6,NO,total,368.0,1785.0,21.0,9.0,3.0,1.0,1.866688
7,LA,total,363.0,1676.0,19.0,3.0,2.0,0.0,-17.251822
8,PHI,total,279.0,1642.0,12.0,11.0,4.0,1.0,3.603168
9,GB,total,324.0,1610.0,11.0,3.0,1.0,1.0,0.039279
