In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from adjustText import adjust_text

COLORS = {'ARI':'#97233F','ATL':'#A71930','BAL':'#241773','BUF':'#00338D','CAR':'#0085CA','CHI':'#00143F',
          'CIN':'#FB4F14','CLE':'#FB4F14','DAL':'#B0B7BC','DEN':'#002244','DET':'#046EB4','GB':'#24423C',
          'HOU':'#C9243F','IND':'#003D79','JAX':'#136677','KC':'#CA2430','LA':'#002147','LAC':'#2072BA',
          'LV':'#C4C9CC','MIA':'#0091A0','MIN':'#4F2E84','NE':'#0A2342','NO':'#A08A58','NYG':'#192E6C',
          'NYJ':'#203731','PHI':'#014A53','PIT':'#FFC20E','SEA':'#7AC142','SF':'#C9243F','TB':'#D40909',
          'TEN':'#4095D1','WAS':'#FFC20F'}

In [None]:
#Single season
YEAR = 2020

data = pd.read_csv(
    'https://github.com/guga31bb/nflfastR-data/blob/master/data/play_by_play_' \
    + str(YEAR) + '.csv.gz?raw=True',compression='gzip', low_memory=False
    )

In [None]:
df = data[
        (data.play_type.isin(['pass','run', 'no_play'])) &
        (data.two_point_attempt==0) &
        (data['epa'].isna()==False) & 
        (data['week'] < 18)
        ]

In [None]:
qb_data = (
    df.groupby(['passer','posteam']).agg({'epa': 'mean','cpoe': 'mean', 'yards_gained': 'sum', 'pass_touchdown': 'sum', 'wpa': 'count'})
    ).reset_index(level='posteam')

qb_data = qb_data.loc[qb_data.wpa > 160]
qb_data.sort_values('epa', ascending=False, inplace=True)
qb_data.columns = ['posteam', 'EPA_play', 'CPOE', 'yards', 'pTD', 'Plays']

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
qb_data['color'] = qb_data['posteam'].map(COLORS)
ax.scatter(x = qb_data.yards, y = qb_data.pTD, s = qb_data.Plays/2, c = qb_data.color)

texts = [plt.text(x0+0.1,y0+0.1,name,ha='right',va='bottom') for x0,y0,name in zip(
    qb_data.yards, qb_data.pTD, qb_data.index)]

adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(qb_data.yards, qb_data.pTD, 1)
plt.plot(qb_data.yards, m*qb_data.yards + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('QB volume stats',fontsize=20,pad=15)
ax.set_xlabel('Passing yards',fontsize=16,labelpad=15)
ax.set_ylabel('Passing Touchdowns',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=10)

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
qb_data['color'] = qb_data['posteam'].map(COLORS)
ax.scatter(x = qb_data.CPOE, y = qb_data.EPA_play, s = qb_data.Plays/2, c = qb_data.color)

texts = [plt.text(x0+0.1,y0+0.015,name,ha='right',va='bottom') for x0,y0,name in zip(
    qb_data.CPOE, qb_data.EPA_play, qb_data.index)]

adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(qb_data.CPOE, qb_data.EPA_play, 1)
plt.plot(qb_data.CPOE, m*qb_data.CPOE + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('QB efficacy',fontsize=20,pad=15)
ax.set_xlabel('Completed passes over expectation (%)',fontsize=16,labelpad=15)
ax.set_ylabel('EPA / play ',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=10)

In [None]:
wr_data = (
    df.groupby(['receiver','posteam']).agg({'epa': 'mean','cpoe': 'mean', 'yards_gained': 'sum', 'pass_touchdown': 'sum', 'wpa': 'count'})
    ).reset_index(level='posteam')

wr_data = wr_data.loc[wr_data.wpa > 80]
wr_data.sort_values('yards_gained', ascending=False, inplace=True)
wr_data.columns = ['posteam', 'EPA_play', 'CPOE', 'yards', 'pTD', 'Plays']

In [None]:
fig, ax = plt.subplots(figsize=(24,20))
wr_data['color'] = wr_data['posteam'].map(COLORS)
ax.scatter(x = wr_data.yards, y = wr_data.pTD, s = wr_data.Plays, c = wr_data.color)

texts = [plt.text(x0+0.1,y0+0.015,name,ha='right',va='bottom') for x0,y0,name in zip(
    wr_data.yards, wr_data.pTD, wr_data.index)]

adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(wr_data.yards, wr_data.pTD, 1)
plt.plot(wr_data.yards, m*wr_data.yards + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('Receiving - volume stats (minimum 80 targets)',fontsize=20,pad=15)
ax.set_xlabel('Receiving yards',fontsize=16,labelpad=15)
ax.set_ylabel('Receiving touchdowns',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=15)

In [None]:
fig, ax = plt.subplots(figsize=(24,20))
wr_data['color'] = wr_data['posteam'].map(COLORS)
ax.scatter(x = wr_data.CPOE, y = wr_data.EPA_play, s = wr_data.Plays, c = wr_data.color)

texts = [plt.text(x0,y0,name,ha='right',va='bottom') for x0,y0,name in zip(
    wr_data.CPOE, wr_data.EPA_play, wr_data.index)]


adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(wr_data.CPOE, wr_data.EPA_play, 1)
plt.plot(wr_data.CPOE, m*wr_data.CPOE + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('Receiving - efficacy (minimum 80 targets)',fontsize=20,pad=15)
ax.set_xlabel('Completed passes over expectation (%)',fontsize=16,labelpad=15)
ax.set_ylabel('EPA / target ',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=15)

In [None]:
wr_df = df[
          df.complete_pass==1
          ]

wr_data_comp = (
    wr_df.groupby(['receiver','posteam']).agg({'air_yards': 'mean', 'yards_after_catch': 'mean','wpa': 'count'})
    ).reset_index(level='posteam')

wr_data_comp = wr_data_comp.loc[wr_data_comp.wpa > 48]
wr_data_comp.sort_values('air_yards', ascending=False, inplace=True)
wr_data_comp.columns = ['posteam', 'air', 'yac', 'Plays']

In [None]:
fig, ax = plt.subplots(figsize=(24,20))
wr_data_comp['color'] = wr_data_comp['posteam'].map(COLORS)
ax.scatter(x = wr_data_comp.air, y = wr_data_comp.yac, s = wr_data_comp.Plays, c = wr_data_comp.color)

texts = [plt.text(x0+0.1,y0+0.015,name,ha='right',va='bottom') for x0,y0,name in zip(
    wr_data_comp.air, wr_data_comp.yac, wr_data_comp.index)]


#adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(wr_data_comp.air, wr_data_comp.yac, 1)
plt.plot(wr_data_comp.air, m*wr_data_comp.air + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('Receiving yards per completion - before/after catch (minimum 48 catches)',fontsize=20,pad=15)
ax.set_xlabel('Air yards', fontsize=16,labelpad=15)
ax.set_ylabel('Yards after catch',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=15)

In [None]:
rb_data = (
    df.groupby(['rusher','posteam']).agg({'epa': 'mean','success': 'mean', 'yards_gained': 'sum', 'rush_touchdown': 'sum', 'wpa': 'count'})
    ).reset_index(level='posteam')

rb_data = rb_data.loc[rb_data.wpa > 100]
rb_data.sort_values('epa', ascending=False, inplace=True)
rb_data.columns = ['posteam', 'EPA_play', 'success_rate', 'yards', 'rTD', 'Plays']

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
rb_data['color'] = rb_data['posteam'].map(COLORS)
ax.scatter(x = rb_data.yards, y = rb_data.rTD, s = rb_data.Plays, c = rb_data.color)

texts = [plt.text(x0+0.1,y0+0.015,name,ha='right',va='bottom') for x0,y0,name in zip(
    rb_data.yards, rb_data.rTD, rb_data.index)]


adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(rb_data.yards, rb_data.rTD, 1)
plt.plot(rb_data.yards, m*rb_data.yards + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('Rushing volume stats (minimum 100 rush attempts)',fontsize=20,pad=15)
ax.set_xlabel('Rushing yards', fontsize=16,labelpad=15)
ax.set_ylabel('Rushing touchdowns',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=15)

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
rb_data['color'] = rb_data['posteam'].map(COLORS)
ax.scatter(x = rb_data.success_rate, y = rb_data.EPA_play, s = rb_data.Plays, c = rb_data.color)

texts = [plt.text(x0,y0,name,ha='right',va='bottom') for x0,y0,name in zip(
    rb_data.success_rate, rb_data.EPA_play, rb_data.index)]


adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Add trend line
m, b = np.polyfit(rb_data.success_rate, rb_data.EPA_play, 1)
plt.plot(rb_data.success_rate, m*rb_data.success_rate + b, 'k', alpha=.3,linestyle='-')

#Add title, labels, and cite the data source
ax.set_title('Rushing efficiency stats (minimum 100 rush attempts)',fontsize=20,pad=15)
ax.set_xlabel('Rushing success rate', fontsize=16,labelpad=15)
ax.set_ylabel('Rushing EPA/play',fontsize=16,labelpad=15)
plt.figtext(.72,.06,'Author: KiraQQ, Data: nflfastR',fontsize=15)