# What Makes a Legendary Running Back Season?


In [13]:
# IMPORT PACKAGES
import pandas as pd
import sqlalchemy as sa
import pymysql
import os
from sqlalchemy import create_engine
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cryptography
from sklearn.linear_model import LinearRegression

In [14]:
# CONNECT TO LOCAL FANTASY SQL DATABASE
# DEFINE THE DATABASE CREDENTIALS
user = 'root'
password = 'password123'
host = 'localhost'
port = 3306
database = 'fantasydb'

# PYTHON FUNCTION TO CONNECT TO THE MYSQL DATABASE AND
# RETURN THE SQLACHEMY ENGINE OBJECT
def get_connection():
	return create_engine(
		url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
			user, password, host, port, database
		)
	)

# CONNECT TO SQL DATABASE
try:
    # GET THE CONNECTION OBJECT (ENGINE) FOR THE DATABASE
    engine = get_connection()
    print(
        f"Connection to the {host} for user {user} created successfully.")
except Exception as ex:
    print("Connection could not be made due to the following error: \n", ex)

Connection to the localhost for user root created successfully.


In [15]:
# GET DATAFRAME OF ALL PLAYERS OF *POSITION* IN *YEAR*
def retrieve_players(pos, year):
    with engine.begin() as conn:
        df = pd.read_sql_query(sa.text(
            f'''
            SELECT 
                *
            FROM 
                {pos}_stats_{year}
            '''), conn)
        conn.close()
    return df

# GET THE TOP *RANK* *POSITION* PLAYERS OF *YEAR* AND THEIR *POSITION* STATS FROM PRIOR *YEAR*
def top_x_players_prior_stats(pos, year, rank):
    valid_year = ['2019', '2020', '2021', '2022']
    valid_pos = ['qb', 'rb', 'wr', 'te']
    
    if year not in valid_year:
        return print('Not a valid year: 2019-2022')
    if pos not in valid_pos:
        return print('Not a valid position')
    
    prior_year= str(int(year) -1)
    age_change = 2023 - int(prior_year)
    
    with engine.begin() as conn:
        df = pd.read_sql_query(sa.text(
            f'''
            SELECT
                {pos}_stats_{year}.NAME as NAME1,
                {pos}_stats_{year}.RANK as RANK1,
                ppr_adp_{prior_year}.AGE,
                {pos}_stats_{prior_year}.*
            FROM 
                {pos}_stats_{prior_year}
            INNER JOIN 
                {pos}_stats_{year} 
            ON 
                {pos}_stats_{year}.NAME = {pos}_stats_{prior_year}.NAME
            INNER JOIN
                ppr_adp_{prior_year}
            ON
                {pos}_stats_{prior_year}.NAME = ppr_adp_{prior_year}.NAME
                
            WHERE
                ({pos}_stats_{year}.FPTS / {pos}_stats_{year}.G) >= 22.5
            ORDER BY
                {pos}_stats_{year}.RANK ASC            
            '''), conn)
        conn.close()
        
        df = df.drop(columns=['index', 'RANK', 'NAME'])
        df["AGE"] = df["AGE"].subtract(age_change)
        df.columns = ['PRIOR_' + str(col) for col in df.columns]
        df = df.rename(columns={'PRIOR_NAME1':'NAME', 'PRIOR_RANK1':'PRIOR_RANK', })
    return df

In [16]:
df1 = top_x_players_prior_stats('rb', '2019', '12')
df2 = top_x_players_prior_stats('rb', '2020', '12')
df3 = top_x_players_prior_stats('rb', '2021', '12')
df4 = top_x_players_prior_stats('rb', '2022', '12')
legendary_rbs = pd.concat([df1, df2, df3, df4], ignore_index=True)

rush_yds_per_game = round(legendary_rbs['PRIOR_RUS_YDS']/legendary_rbs['PRIOR_G'], 1)
ppr_per_game = round(legendary_rbs['PRIOR_FPTS']/legendary_rbs['PRIOR_G'], 1)
rec_yds_per_game = round(legendary_rbs['PRIOR_REC_YDS']/legendary_rbs['PRIOR_G'], 1)

legendary_rbs['PRIOR_RUS_YDS_PER_G'] = rush_yds_per_game
legendary_rbs['PRIOR_REC_YDS_PER_G'] = rec_yds_per_game
legendary_rbs['PRIOR_PPR_POINTS_PER_G'] = ppr_per_game

legendary_rbs = legendary_rbs[['NAME', 'PRIOR_RANK', 'PRIOR_AGE', 
           'PRIOR_RUS_YDS_PER_G', 'PRIOR_REC_YDS_PER_G', 'PRIOR_PPR_POINTS_PER_G',
           'PRIOR_IMP_ATT', 'PRIOR_SNAPS', 'PRIOR_BRKTKL', 
           'PRIOR_GRZ_ATT', 'PRIOR_EXPLO', ]]

desc_rb= legendary_rbs.describe()
legendary_rbs

Unnamed: 0,NAME,PRIOR_RANK,PRIOR_AGE,PRIOR_RUS_YDS_PER_G,PRIOR_REC_YDS_PER_G,PRIOR_PPR_POINTS_PER_G,PRIOR_IMP_ATT,PRIOR_SNAPS,PRIOR_BRKTKL,PRIOR_GRZ_ATT,PRIOR_EXPLO
0,Christian McCaffrey,1.0,22.0,68.6,54.2,24.1,343.0,966.0,15.0,16.0,15.0
1,Alvin Kamara,1.0,24.0,56.9,38.1,17.8,268.0,626.0,29.0,7.0,8.0
2,Dalvin Cook,2.0,23.0,81.1,37.1,20.9,313.0,604.0,20.0,21.0,12.0
3,Christian McCaffrey,54.0,23.0,86.7,62.8,29.4,429.0,1039.0,16.0,19.0,18.0
4,Derrick Henry,22.0,26.0,126.7,7.1,20.8,409.0,705.0,34.0,17.0,30.0


## Legendary Running Backs (2019 - 2022)
In the span of 4 years, there have been 5 legenary running back seasons. What we want to do is look at their per game statistics and look for trends that may help us identify running backs from this past season who have higher chance to enter the legendary running back tier.

In [17]:
desc_rb

Unnamed: 0,PRIOR_RANK,PRIOR_AGE,PRIOR_RUS_YDS_PER_G,PRIOR_REC_YDS_PER_G,PRIOR_PPR_POINTS_PER_G,PRIOR_IMP_ATT,PRIOR_SNAPS,PRIOR_BRKTKL,PRIOR_GRZ_ATT,PRIOR_EXPLO
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,16.0,23.6,84.0,39.86,22.6,352.4,788.0,22.8,16.0,16.6
std,23.054284,1.516575,26.507357,21.306173,4.406246,66.774247,201.043528,8.348653,5.385165,8.354639
min,1.0,22.0,56.9,7.1,17.8,268.0,604.0,15.0,7.0,8.0
25%,1.0,23.0,68.6,37.1,20.8,313.0,626.0,16.0,16.0,12.0
50%,2.0,23.0,81.1,38.1,20.9,343.0,705.0,20.0,17.0,15.0
75%,22.0,24.0,86.7,54.2,24.1,409.0,966.0,29.0,19.0,18.0
max,54.0,26.0,126.7,62.8,29.4,429.0,1039.0,34.0,21.0,30.0


In [18]:
# GET DATAFRAME OF RUNNING BACKS FROM *YEAR* WHO HAVE LEGENDARY POTENTIAL
# i.e. STATS BETTER THAN LEGENDARY RUNNING BACK AVERAGE STATS MINUS ONE STANDARD DEVIATION
def retrieve_breakout_rbs(year):
    with engine.begin() as conn:
        df = pd.read_sql_query(sa.text(
            f'''
            SELECT
                rb_stats_{year}.*,
                ppr_adp_{str(int(year)+1)}.ADP,
                ppr_adp_{str(int(year)+1)}.AGE
            FROM 
                rb_stats_{year}
            INNER JOIN 
                ppr_adp_{str(int(year)+1)} 
            ON 
                rb_stats_{year}.NAME = ppr_adp_{str(int(year)+1)}.NAME 
            WHERE
                ppr_adp_{str(int(year)+1)}.AGE <= 27
            AND 
                (rb_stats_{year}.RUS_YDS / rb_stats_{year}.G)  > {desc_rb['PRIOR_RUS_YDS_PER_G'][1]-desc_rb['PRIOR_RUS_YDS_PER_G'][2]}
            AND 
                (rb_stats_{year}.REC_YDS / rb_stats_{year}.G)  > {desc_rb['PRIOR_REC_YDS_PER_G'][1]-desc_rb['PRIOR_REC_YDS_PER_G'][2]}
            ORDER BY
                ppr_adp_{str(int(year)+1)}.ADP
            ASC
            '''), conn)
        conn.close()
        
        df = df[['NAME', 'RANK', 'ADP']]

    return df

# GET DATAFRAME COMPARING THE OVERALL RANK OF RUNNING BACKS FROM *retrieve_breakout_rbs()* 
# FROM *YEAR* AND THEIR OVERALL RANK THE FOLLOWING YEAR
def compare_breakout_rb(year):
    df1 = retrieve_breakout_rbs(year)
    df2 = retrieve_players('rb',str(int(year)+1))
    df3 = df1.merge(df2, on='NAME')
    df3 = df3[['NAME', 'RANK_x', 'ADP', 'RANK_y']].rename(columns={
        'RANK_x': year + '_RANK', 'ADP': year + '_ADP', 'RANK_y': str(int(year)+1)+'_RANK'
    })
    df3['CHANGE'] = df3[ year + '_RANK'] - df3[str(int(year)+1)+'_RANK']
    df3['IMPROVE'] = df3['CHANGE'] >= 0
    df3['TOP_5'] = df3[str(int(year)+1)+'_RANK'] <= 5
    df3 = df3[['NAME', year+'_ADP', year+'_RANK', str(int(year)+1)+'_RANK', 'CHANGE', 'IMPROVE', 'TOP_5']]
    return df3

In [19]:
# COMPARE RBS WITH LEGENDARY POTENTIAL FROM 2018 TO 2021
br_rb_2018 = compare_breakout_rb('2018')
br_rb_2019 = compare_breakout_rb('2019')
br_rb_2020 = compare_breakout_rb('2020')
br_rb_2021 = compare_breakout_rb('2021')

In [20]:
br_rb_2018

Unnamed: 0,NAME,2018_ADP,2018_RANK,2019_RANK,CHANGE,IMPROVE,TOP_5
0,Saquon Barkley,1.4,1.0,10.0,-9.0,False,False
1,Christian McCaffrey,3.2,2.0,1.0,1.0,True,True
2,Joe Mixon,18.4,10.0,13.0,-3.0,False,False
3,Kerryon Johnson,28.5,34.0,54.0,-20.0,False,False
4,Kareem Hunt,100.8,12.0,47.0,-35.0,False,False


In [21]:
br_rb_2019

Unnamed: 0,NAME,2019_ADP,2019_RANK,2020_RANK,CHANGE,IMPROVE,TOP_5
0,Christian McCaffrey,1.2,1.0,54.0,-53.0,False,False
1,Saquon Barkley,2.5,10.0,120.0,-110.0,False,False
2,Dalvin Cook,6.2,6.0,2.0,4.0,True,True


In [22]:
br_rb_2020

Unnamed: 0,NAME,2020_ADP,2020_RANK,2021_RANK,CHANGE,IMPROVE,TOP_5
0,Christian McCaffrey,1.3,54.0,38.0,16.0,True,False
1,Dalvin Cook,2.5,2.0,16.0,-14.0,False,False
2,Jonathan Taylor,13.2,6.0,1.0,5.0,True,True
3,Joe Mixon,19.4,49.0,4.0,45.0,True,True
4,Clyde Edwards-Helaire,19.7,22.0,46.0,-24.0,False,False
5,David Montgomery,25.4,4.0,20.0,-16.0,False,False
6,James Robinson,27.1,7.0,24.0,-17.0,False,False
7,Myles Gaskin,39.2,28.0,25.0,3.0,True,False


In [23]:
br_rb_2021

Unnamed: 0,NAME,2021_ADP,2021_RANK,2022_RANK,CHANGE,IMPROVE,TOP_5
0,Jonathan Taylor,1.4,1.0,33.0,-32.0,False,False
1,Christian McCaffrey,2.4,38.0,2.0,36.0,True,True
2,Najee Harris,7.0,3.0,14.0,-11.0,False,False
3,Joe Mixon,10.0,4.0,10.0,-6.0,False,False
4,David Montgomery,39.3,20.0,24.0,-4.0,False,False
5,Josh Jacobs,47.3,12.0,3.0,9.0,True,True


## Observations
Every year since 2018, from the list of potential legendary breakout running backs, at least one went on to not only improve their overall rank, but rank into the top 5 as well.

In [24]:
retrieve_breakout_rbs('2022')

Unnamed: 0,NAME,RANK,ADP
0,Christian McCaffrey,2.0,2
1,Saquon Barkley,5.0,8
2,Tony Pollard,8.0,20
3,Josh Jacobs,3.0,21
4,Rhamondre Stevenson,7.0,24
5,Breece Hall,42.0,31
6,Travis Etienne Jr.,17.0,32


From this list, my top two potential legendary breakouts for the 2023 season are Tony Pollard and Travis Etienne Jr. With Ezekiel Elliot leaving the Cowboy Backfield, Pollard should see a massive increase in the amount of snaps played, allowing him more opportunities to score. There is much buzz around Etienne since the Jaguars drafted Tank Bigsby. I don't buy into fear that Etienne busts as a result. I think Etienne is one of the most explosive players in the league and going into year 3, which is really is 2nd year playing, he should see a massive improvement. Both Pollard and Etienne are on some of the best Offenses in the leagues, providing even more opportunity for them. I would lean towards Pollard.