In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
rb_df = pd.read_csv('rb_stats.csv')
rb_df.columns = rb_df.columns.str.lower()

# get depth chart position for RBs
rb_df['pos_depth'] = np.ceil(rb_df['rank'] / 12)

rb_df.head(20)

Unnamed: 0,rank,player,att,yds,y/a,lg,20+,td,rec,tgt,rec_yds,y/r,rec_td,fl,g,fpts,fpts/g,own,year,pos_depth
0,1,Todd Gurley II (FA),256,1251,4.9,36,12,17,59,81,580,9.8,4,1,14,313.1,22.4,0.1%,2018,1.0
1,2,Saquon Barkley (NYG),261,1307,5.0,78,37,11,91,121,721,7.9,4,0,16,294.8,18.4,99.9%,2018,1.0
2,3,Christian McCaffrey (SF),219,1098,5.0,59,15,7,107,124,867,8.1,6,1,16,278.5,17.4,100.0%,2018,1.0
3,4,Alvin Kamara (NO),194,883,4.6,49,3,14,81,105,709,8.8,4,0,15,273.2,18.2,91.8%,2018,1.0
4,5,Ezekiel Elliott (FA),304,1434,4.7,41,15,6,77,95,567,7.4,3,1,15,252.1,16.8,37.4%,2018,1.0
5,6,Melvin Gordon III (BAL),175,885,5.1,34,11,10,50,66,490,9.8,4,0,12,225.5,18.8,1.3%,2018,1.0
6,7,James Conner (ARI),215,970,4.5,30,11,12,55,71,497,9.0,1,2,13,224.7,17.3,93.9%,2018,1.0
7,8,Kareem Hunt (FA),181,824,4.6,45,10,7,26,35,378,14.5,7,0,11,204.2,18.6,16.6%,2018,1.0
8,9,Joe Mixon (CIN),237,1168,4.9,51,20,8,43,55,296,6.9,1,0,14,200.4,14.3,98.8%,2018,1.0
9,10,David Johnson (FA),258,940,3.6,53,5,7,50,76,446,8.9,3,2,16,196.6,12.3,0.0%,2018,1.0


In [3]:
rb_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1021 entries, 0 to 1020
Data columns (total 20 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   rank       1021 non-null   int64  
 1   player     1021 non-null   object 
 2   att        1021 non-null   int64  
 3   yds        1021 non-null   object 
 4   y/a        1021 non-null   float64
 5   lg         1021 non-null   int64  
 6   20+        1021 non-null   int64  
 7   td         1021 non-null   int64  
 8   rec        1021 non-null   int64  
 9   tgt        1021 non-null   int64  
 10  rec_yds    1021 non-null   object 
 11  y/r        1021 non-null   float64
 12  rec_td     1021 non-null   int64  
 13  fl         1021 non-null   int64  
 14  g          1021 non-null   int64  
 15  fpts       1021 non-null   float64
 16  fpts/g     1021 non-null   float64
 17  own        1021 non-null   object 
 18  year       1021 non-null   int64  
 19  pos_depth  1021 non-null   float64
dtypes: float

## RB EDA

><p style="font-size: 16px">The goal of this eda is to compare how much a drop in depth chart ranking correlates with Fantasy Points Per game for each position.  I.e. how many more fantasy points per game does an RB1 produce over an RB2?</p>

In [4]:
# get top 5 in terms of position depth rank
rb_df_filtered = rb_df[rb_df.pos_depth <= 5]
rb_df_filtered.head()

Unnamed: 0,rank,player,att,yds,y/a,lg,20+,td,rec,tgt,rec_yds,y/r,rec_td,fl,g,fpts,fpts/g,own,year,pos_depth
0,1,Todd Gurley II (FA),256,1251,4.9,36,12,17,59,81,580,9.8,4,1,14,313.1,22.4,0.1%,2018,1.0
1,2,Saquon Barkley (NYG),261,1307,5.0,78,37,11,91,121,721,7.9,4,0,16,294.8,18.4,99.9%,2018,1.0
2,3,Christian McCaffrey (SF),219,1098,5.0,59,15,7,107,124,867,8.1,6,1,16,278.5,17.4,100.0%,2018,1.0
3,4,Alvin Kamara (NO),194,883,4.6,49,3,14,81,105,709,8.8,4,0,15,273.2,18.2,91.8%,2018,1.0
4,5,Ezekiel Elliott (FA),304,1434,4.7,41,15,6,77,95,567,7.4,3,1,15,252.1,16.8,37.4%,2018,1.0


In [5]:
rb_group = rb_df_filtered.groupby('pos_depth').aggregate({'fpts/g': 'mean'})
rb_group['ppg_diff'] = rb_group['fpts/g'].rolling(window = 2).apply(lambda x: x.iloc[1] - x.iloc[0]).fillna(0)
rb_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,14.988333,0.0
2.0,10.895,-4.093333
3.0,8.66,-2.235
4.0,7.221667,-1.438333
5.0,5.835,-1.386667


## WR EDA

In [6]:
wr_df = pd.read_csv('wr_stats.csv')
wr_df.columns = wr_df.columns.str.lower()

# get depth chart position for RBs
wr_df['pos_depth'] = np.ceil(wr_df['rank'] / 12)
wr_df.head()

Unnamed: 0,rank,player,rec,tgt,yds,y/r,lg,20+,td,rush_att,rush_yds,rush_td,fl,g,fpts,fpts/g,own,year,pos_depth
0,1,Tyreek Hill (MIA),87,137,1479,17.0,75,55,12,22,151,1,0,16,241.0,15.1,100.0%,2018,1.0
1,2,Antonio Brown (FA),104,168,1297,12.5,78,33,15,0,0,0,0,15,219.7,14.6,0.2%,2018,1.0
2,3,Davante Adams (LV),111,169,1386,12.5,57,33,13,0,0,0,0,15,218.6,14.6,99.9%,2018,1.0
3,4,DeAndre Hopkins (TEN),115,163,1572,13.7,49,36,11,1,-7,0,2,16,218.5,13.7,96.3%,2018,1.0
4,5,Julio Jones (FA),113,170,1677,14.8,58,37,8,2,12,0,2,16,212.9,13.3,7.1%,2018,1.0


In [7]:
wr_df_filtered = wr_df[wr_df.pos_depth <= 5]
wr_df_filtered.tail()

Unnamed: 0,rank,player,rec,tgt,yds,y/r,lg,20+,td,rush_att,rush_yds,rush_td,fl,g,fpts,fpts/g,own,year,pos_depth
1118,56,Russell Gage (TB),51,70,426,8.4,23,3,5,0,0,0,1,13,72.6,5.6,1.1%,2022,5.0
1119,57,Rashid Shaheed (NO),28,34,488,17.4,68,19,2,4,57,1,0,12,72.5,6.0,11.3%,2022,5.0
1120,58,DeVante Parker (NE),31,47,539,17.4,43,18,3,0,0,0,0,13,71.9,5.5,2.4%,2022,5.0
1121,59,Noah Brown (HOU),43,74,555,12.9,51,12,3,0,0,0,1,16,71.5,4.5,0.1%,2022,5.0
1122,60,Alec Pierce (IND),41,78,593,14.5,47,16,2,0,0,0,0,16,71.3,4.5,6.2%,2022,5.0


In [8]:
wr_group = wr_df_filtered.groupby(['pos_depth']).aggregate({'fpts/g': 'mean'})
wr_group['ppg_diff'] = wr_group['fpts/g'].rolling(2).apply(lambda x: x.iloc[1] - x.iloc[0]).fillna(0)
wr_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,12.096667,0.0
2.0,9.58,-2.516667
3.0,8.04,-1.54
4.0,7.215,-0.825
5.0,6.248333,-0.966667


## TE EDA

In [9]:
te_df = pd.read_csv('te_stats.csv')
te_df.columns = te_df.columns.str.lower()
te_df.player = te_df.player.str.strip()

# create pos depth rankings for TEs
te_df['pos_depth'] = np.ceil(te_df['rank'] / 3)

te_df.head()

Unnamed: 0,rank,player,rec,tgt,yds,y/r,lg,20+,td,rush_att,rush_yds,rush_td,fl,g,fpts,fpts/g,own,year,pos_depth
0,1,Travis Kelce (KC),103,150,1336,13.0,43,26,10,0,0,0,1,16,191.6,12.0,100.0%,2018,1.0
1,2,George Kittle (SF),88,136,1377,15.6,85,40,5,1,10,0,0,16,170.7,10.7,99.3%,2018,1.0
2,3,Zach Ertz (ARI),116,156,1163,10.0,34,14,8,0,0,0,0,16,164.3,10.3,21.8%,2018,1.0
3,4,Eric Ebron (FA),66,110,750,11.4,53,10,13,3,-8,1,1,16,156.2,9.8,0.0%,2018,2.0
4,5,Jared Cook (FA),68,101,896,13.2,45,21,6,0,0,0,0,16,125.6,7.9,0.0%,2018,2.0


In [10]:
te_df_filtered = te_df[te_df.pos_depth <= 5].copy()

# create own pos depth for Kelce
te_df_filtered.loc[te_df_filtered.player == 'Travis Kelce (KC)', 'pos_depth'] = 0

te_df_filtered.head()

Unnamed: 0,rank,player,rec,tgt,yds,y/r,lg,20+,td,rush_att,rush_yds,rush_td,fl,g,fpts,fpts/g,own,year,pos_depth
0,1,Travis Kelce (KC),103,150,1336,13.0,43,26,10,0,0,0,1,16,191.6,12.0,100.0%,2018,0.0
1,2,George Kittle (SF),88,136,1377,15.6,85,40,5,1,10,0,0,16,170.7,10.7,99.3%,2018,1.0
2,3,Zach Ertz (ARI),116,156,1163,10.0,34,14,8,0,0,0,0,16,164.3,10.3,21.8%,2018,1.0
3,4,Eric Ebron (FA),66,110,750,11.4,53,10,13,3,-8,1,1,16,156.2,9.8,0.0%,2018,2.0
4,5,Jared Cook (FA),68,101,896,13.2,45,21,6,0,0,0,0,16,125.6,7.9,0.0%,2018,2.0


In [11]:
te_group = te_df_filtered.groupby('pos_depth').aggregate({'fpts/g': 'mean'})
te_group['ppg_diff'] = te_group['fpts/g'].rolling(2).apply(lambda x: x.iloc[1] - x.iloc[0]).fillna(0)
te_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,11.7,0.0
1.0,9.59,-2.11
2.0,7.933333,-1.656667
3.0,6.5,-1.433333
4.0,6.02,-0.48
5.0,5.466667,-0.553333


## QB EDA

In [12]:
qb_df = pd.read_csv('qb_stats.csv')
qb_df.columns = qb_df.columns.str.lower()

# generate QB pos depth
qb_df['pos_depth'] = np.ceil(qb_df['rank'] / 4)

qb_df.head()

Unnamed: 0,rank,player,cmp,att,pct,yds,y/a,td,int,sacks,rush_att,rush_yds,rush_td,fl,g,fpts,fpts/g,own,year,pos_depth
0,1,Patrick Mahomes II (KC),383,580,66.0,5097,8.8,50,12,26,60,272,2,2,16,417.0,26.1,100.0%,2018,1.0
1,2,Matt Ryan (FA),422,608,69.4,4924,8.1,35,7,42,33,125,3,5,16,354.0,22.1,6.1%,2018,1.0
2,3,Ben Roethlisberger (FA),452,675,67.0,5129,7.6,34,16,24,31,98,3,2,16,341.0,21.3,13.6%,2018,1.0
3,4,Deshaun Watson (CLE),345,505,68.3,4165,8.2,26,9,62,99,551,5,3,16,331.9,20.7,91.9%,2018,1.0
4,5,Andrew Luck (FA),430,639,67.3,4593,7.2,39,15,18,46,148,0,1,16,327.1,20.4,2.2%,2018,2.0


In [13]:
qb_df_filtered = qb_df[qb_df.pos_depth <= 5]
qb_df_filtered.tail()

Unnamed: 0,rank,player,cmp,att,pct,yds,y/a,td,int,sacks,rush_att,rush_yds,rush_td,fl,g,fpts,fpts/g,own,year,pos_depth
348,16,Russell Wilson (DEN),292,483,60.5,3524,7.3,16,11,55,55,277,3,2,15,236.0,15.7,74.3%,2022,4.0
349,17,Derek Carr (NO),305,502,60.8,3522,7.0,24,14,27,24,102,0,0,15,233.0,15.5,36.3%,2022,5.0
350,18,Dak Prescott (DAL),261,394,66.2,2860,7.3,23,15,20,45,182,1,1,12,213.6,17.8,91.7%,2022,5.0
351,19,Kyler Murray (ARI),259,390,66.4,2368,6.1,14,7,25,67,418,3,2,11,207.6,18.9,32.8%,2022,5.0
352,20,Marcus Mariota (PHI),184,300,61.3,2219,7.4,15,9,28,85,438,4,3,13,205.7,15.8,0.7%,2022,5.0


In [14]:
qb_df_filtered.groupby(['year', 'pos_depth']).aggregate({'fpts/g': 'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,fpts/g
year,pos_depth,Unnamed: 2_level_1
2018,1.0,22.55
2018,2.0,19.9
2018,3.0,18.65
2018,4.0,17.75
2018,5.0,13.8
2019,1.0,22.925
2019,2.0,20.05
2019,3.0,17.775
2019,4.0,16.45
2019,5.0,16.225


In [15]:
qb_group = qb_df_filtered[qb_df_filtered.year < 2022].groupby('pos_depth').aggregate({'fpts/g': 'mean'})
qb_group['ppg_diff'] = qb_group['fpts/g'].rolling(2).apply(lambda x: x.iloc[1] - x.iloc[0]).fillna(0)
qb_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,23.3625,0.0
2.0,20.8125,-2.55
3.0,19.31875,-1.49375
4.0,17.3875,-1.93125
5.0,15.25625,-2.13125


In [16]:
qb_group_22 = qb_df_filtered[qb_df_filtered.year == 2022].groupby('pos_depth').aggregate({'fpts/g': 'mean'})
qb_group_22['ppg_diff'] = qb_group_22['fpts/g'].rolling(2).apply(lambda x: x.iloc[1] - x.iloc[0]).fillna(0)
qb_group_22.columns = qb_group_22.columns + '_22'
qb_group_22

Unnamed: 0_level_0,fpts/g_22,ppg_diff_22
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,24.2,0.0
2.0,18.725,-5.475
3.0,17.25,-1.475
4.0,17.3,0.05
5.0,17.0,-0.3


In [17]:
qb_final_group = pd.merge(qb_group, qb_group_22, how = 'inner', left_index = True, right_index = True)
qb_final_group

Unnamed: 0_level_0,fpts/g,ppg_diff,fpts/g_22,ppg_diff_22
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,23.3625,0.0,24.2,0.0
2.0,20.8125,-2.55,18.725,-5.475
3.0,19.31875,-1.49375,17.25,-1.475
4.0,17.3875,-1.93125,17.3,0.05
5.0,15.25625,-2.13125,17.0,-0.3


# Position Differences, Which is best to draft Early?

> QB Rankings: Each Tier is Grouped with 4 Players<br>
> RB and WR Rankings: Each Tier is Grouped with 12 Players<br>
> TE Rankings: First Tier is Travis Kelce, second tier contains 2 players, Ties 3-5 contain 3 players

In [18]:
qb_final_group

Unnamed: 0_level_0,fpts/g,ppg_diff,fpts/g_22,ppg_diff_22
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,23.3625,0.0,24.2,0.0
2.0,20.8125,-2.55,18.725,-5.475
3.0,19.31875,-1.49375,17.25,-1.475
4.0,17.3875,-1.93125,17.3,0.05
5.0,15.25625,-2.13125,17.0,-0.3


In [19]:
rb_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,14.988333,0.0
2.0,10.895,-4.093333
3.0,8.66,-2.235
4.0,7.221667,-1.438333
5.0,5.835,-1.386667


In [20]:
wr_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,12.096667,0.0
2.0,9.58,-2.516667
3.0,8.04,-1.54
4.0,7.215,-0.825
5.0,6.248333,-0.966667


In [21]:
te_group

Unnamed: 0_level_0,fpts/g,ppg_diff
pos_depth,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,11.7,0.0
1.0,9.59,-2.11
2.0,7.933333,-1.656667
3.0,6.5,-1.433333
4.0,6.02,-0.48
5.0,5.466667,-0.553333


Based on the grouped tiers above, it is probably best to draft running backs early, since RB1s produce 4.1 more points on average than RB2s.  The next greatest tier break is found in QBs, where the top tier of Quarterbacks produce 2.55 more points than the 2nd tier of quarterbacks.  And, if 2022 can be taken as a future indicator of where this position group is headed, then it may make more sense than ever to take a QB early.  In 2022, the top tier of QBs (top 4) produced 5.48 more points on average than the 2nd tier of QBs.

But, if 2022 is to be an indicator of future