In [27]:
import nfl_data_py as nfl
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
pd.set_option("display.max_columns", None)
import sklearn
from analysis import PositionReport
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [28]:
# List of functions used in the notebook

def execute_statement(sql: str):
    with psycopg2.connect(host="localhost", database="thefantasybot", user="tbakely") as conn:
         df = pd.read_sql(sql, conn)
         return df

def get_corr_heatmap(df: pd.DataFrame, ax = None, show_plot = True):
    exclude_words = ["season", "week"]
    numerical_cols = [col for col in df.columns if df[col].dtype in ['int64', 'float64']\
                      and not any(word in col.lower() for word in exclude_words)]
    correlation_matrix = df[numerical_cols].corr()
    mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
    if show_plot:
        sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', linewidths=0.5, mask=mask, ax=ax)
        plt.show()
    else:
        return sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', linewidths=0.5, mask=mask, ax=ax)

In [8]:
weekly_sql = """
select
	wd.player_id,
	wd.player_name,
	position,
	recent_team,
	wd.season,
	wd.week,
	carries,
	rushing_yards,
	rushing_tds,
	rushing_fumbles,
	rushing_fumbles_lost,
	rushing_first_downs,
	rushing_epa,
	efficiency,
	percent_attempts_gte_eight_defenders,
	avg_time_to_los,
	rush_yards_over_expected,
	avg_rush_yards,
	rush_yards_over_expected_per_att,
	rush_pct_over_expected,
	wd.receptions,
	wd.targets,
	receiving_yards,
	receiving_tds,
	receiving_fumbles,
	receiving_fumbles_lost,
	receiving_air_yards,
	receiving_yards_after_catch,
	receiving_first_downs,
	receiving_epa,
	racr,
	target_share,
	air_yards_share,
	wopr,
	offense_snaps,
	offense_pct,
    redzone.redzone,
	(carries + wd.targets) as total_usage,
    wd.fantasy_points,
    wd.fantasy_points_ppr,
	roof,
	surface,
	weather_hazards,
	temp,
	humidity,
	wind_speed
from archive_data.weekly_data wd
left join archive_data.offense_snap_counts os
on wd.player_id = os.id
and wd.season = os.season
and wd.week = os.week
left join archive_data.ngs_rushing_data ngsr
on wd.player_id = ngsr.player_gsis_id
and wd.season = ngsr.season
and wd.week = ngsr.week
left join archive_data.ngs_receiving_data ngsp
on wd.player_id = ngsp.player_gsis_id
and wd.season = ngsp.season
and wd.week = ngsp.week
left join (select distinct rusher_player_id, game_id, season, week from archive_data.full_pbp) game_id
on wd.player_id = game_id.rusher_player_id
and wd.season = game_id.season
and wd.week = game_id.week
left join archive_data.game_data
on game_data.game_id = game_id.game_id
left join archive_data.redzone_snaps redzone
on wd.player_id = redzone.player_id
and wd.season = redzone.season
and wd.week = redzone.week
where position in ('WR', 'RB', 'TE')
and wd.season between 2016 and 2022;
"""

weekly_wr = """
select 
wr.player_name, 
wr.position, 
wr.season,
wr.week,
offense_snaps,
offense_pct,
target_share,
targets,
receiving_epa,
redzone,
fantasy_points,
fantasy_points_ppr
from archive_data.weekly_wr wr
left join archive_data.redzone_snaps rz
on wr.player_id = rz.player_id
and wr.season = rz.season
and wr.week = rz.week
where wr.season between 2016 and 2023
"""

weekly_rb = """
select
rb.player_name,
rb.position,
rb.season,
rb.week,
offense_snaps,
offense_pct,
total_usage,
rushing_epa,
rush_yards_over_expected_per_att,
rush_pct_over_expected,
target_share,
receiving_epa,
redzone,
fantasy_points,
fantasy_points_ppr
from archive_data.weekly_rb rb
left join archive_data.redzone_snaps rz
on rb.player_id = rz.player_id
and rb.season = rz.season
and rb.week = rz.week
where rb.season between 2016 and 2023
"""

weekly_te = """
select 
te.player_name, 
te.position, 
te.season,
te.week,
offense_snaps,
offense_pct,
target_share,
targets,
receiving_epa,
redzone,
fantasy_points,
fantasy_points_ppr
from archive_data.weekly_te te
left join archive_data.redzone_snaps rz
on te.player_id = rz.player_id
and te.season = rz.season
and te.week = rz.week
where te.season between 2016 and 2023
"""


### Correlation heatmap for weekly position data

In [9]:
wr_df = execute_statement(weekly_wr)
rb_df = execute_statement(weekly_rb)
te_df = execute_statement(weekly_te)

# fig, axs = plt.subplots(3, figsize=(10,20))
# get_corr_heatmap(wr_df, ax=axs[0], show_plot=False)
# axs[0].set_title("WR")
# get_corr_heatmap(rb_df, ax=axs[1], show_plot=False)
# axs[1].set_title("RB")
# get_corr_heatmap(te_df, ax=axs[2], show_plot=False)
# axs[2].set_title("TE")
# plt.tight_layout()
# plt.savefig("positional_heatmaps.png", bbox_inches='tight', dpi=300)
# plt.close()




In [41]:
wr_df

Unnamed: 0,player_name,position,season,week,offense_snaps,offense_pct,target_share,targets,receiving_epa,redzone,fantasy_points,fantasy_points_ppr
0,J.Jones,WR,2023,7,16.0,0.22,0.033333,1,-0.153665,,0.3,1.3
1,J.Jones,WR,2023,8,15.0,0.24,0.057143,2,1.803634,1.0,6.8,7.8
2,J.Jones,WR,2023,11,32.0,0.57,0.095238,2,-0.428815,,0.5,2.5
3,J.Jones,WR,2023,12,47.0,0.71,0.107143,3,-1.967411,,0.0,1.0
4,J.Jones,WR,2023,15,22.0,0.32,0.033333,1,2.262604,1.0,0.6,1.6
...,...,...,...,...,...,...,...,...,...,...,...,...
2316,J.Reed,WR,2023,15,25.0,0.42,0.222222,8,0.518128,2.0,11.2,17.2
2317,J.Reed,WR,2023,17,23.0,0.31,0.235294,8,7.697004,,20.9,26.9
2318,J.Reed,WR,2023,18,36.0,0.60,0.125000,4,7.952763,,11.2,15.2
2319,J.Reed,WR,2023,19,26.0,0.46,0.142857,3,-3.086792,1.0,0.0,0.0


In [5]:
# Machine learning with the position datasets
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Set random seed
SEED = 0

# Prepare data
predictors = ["target_share", "redzone", "offense_pct", "receiving_epa"]
response = ["fantasy_points_ppr"]

X = wr_df[predictors].values
y = wr_df[response].values

# Train/test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

# Pipelines
rand_params= {
    'bootstrap': [True, False],
    'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
    'max_features': ['auto', 'sqrt'],
    'min_samples_leaf': [1, 2, 4],
    'min_samples_split': [2, 5, 10],
    'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]
}

steps = [
    ("imputer", SimpleImputer()),
    ("rfr", 
     RandomizedSearchCV(RandomForestRegressor(random_state=SEED), 
                        param_distributions=rand_params,
                        n_iter=10, 
                        cv=3,
                        verbose=3,
                        random_state=SEED))
]

pipe = Pipeline(steps=steps)
pipe.fit(X_train, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV 1/3] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=600;, score=0.683 total time=  10.0s
[CV 2/3] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=600;, score=0.696 total time=  10.1s
[CV 3/3] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=600;, score=0.674 total time=  10.0s
[CV 1/3] END bootstrap=False, max_depth=40, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1600;, score=0.736 total time=  23.4s
[CV 2/3] END bootstrap=False, max_depth=40, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1600;, score=0.748 total time=  23.5s
[CV 3/3] END bootstrap=False, max_depth=40, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1600;, score=0.735 total time=  23.3s
[CV 1/3] 

In [6]:
pipe.steps[1][1].best_params_

{'n_estimators': 1200,
 'min_samples_split': 10,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'max_depth': 50,
 'bootstrap': True}

In [9]:
# Now do it again with a grid search

# Prepare data
predictors = ["target_share", "redzone", "offense_pct", "receiving_epa"]
response = ["fantasy_points_ppr"]

X = wr_df[predictors].values
y = wr_df[response].values

# Train/test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

# Pipelines
grid_params = {
    'n_estimators': [400, 800, 1200],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 4, None],
    'max_features': ['sqrt'],
    'max_depth': [10, 50, None],
    'bootstrap': [True]
}

steps = [
    ("imputer", SimpleImputer()),
    ("rfr", 
     GridSearchCV(RandomForestRegressor(random_state=SEED), 
                        param_grid=grid_params,
                        cv=5,
                        verbose=3))
]

pipe = Pipeline(steps=steps)
pipe.fit(X_train, y_train)

Fitting 5 folds for each of 81 candidates, totalling 405 fits
[CV 1/5] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400;, score=0.813 total time=   2.3s
[CV 2/5] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400;, score=0.816 total time=   2.3s
[CV 3/5] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400;, score=0.820 total time=   2.2s
[CV 4/5] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400;, score=0.810 total time=   2.2s
[CV 5/5] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400;, score=0.824 total time=   2.3s
[CV 1/5] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=800;, score=0.813 total time=   4.4s
[CV 2/5] END bootstr

In [15]:
pipe.named_steps["rfr"]

dict(max_depth=10, max_features='sqrt', min_samples_split=10,
                      n_estimators=800, random_state=0)

In [22]:
# Final fit
from sklearn.metrics import r2_score

# Prepare data
wr_df["redzone"] = wr_df["redzone"].fillna(0)

predictors = ["target_share", "redzone", "offense_pct", "receiving_epa"]
response = ["fantasy_points_ppr"]

X = wr_df[predictors].values
y = wr_df[response].values

# Train/test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

# Pipelines
best_grid_params = dict(max_depth=10, max_features='sqrt', min_samples_split=10,
                      n_estimators=800)

steps = [
    ("imputer", SimpleImputer()),
    ("rfr", 
     RandomForestRegressor(random_state=SEED, **best_grid_params))
]

pipe_wr = Pipeline(steps=steps)
pipe_wr.fit(X_train, y_train)

y_pred = pipe_wr.predict(X_test)
score = r2_score(y_test, y_pred)
print(f"R2 Score: {score}")


R2 Score: 0.8233343491203113


In [24]:
dict(zip(predictors, list(pipe.steps[1][1].feature_importances_)))

{'target_share': 0.35642074509878413,
 'redzone': 0.041821207365398516,
 'offense_pct': 0.07866878048156989,
 'receiving_epa': 0.5230892670542474}

In [24]:
test_wr_df = wr_df[wr_df["season"] == 2023]
test_wr_df["fantasy_points_ppr_predicted"] = pipe_wr.predict(test_wr_df[predictors].values)
test_wr_df["predicted_error"] = test_wr_df["fantasy_points_ppr"] - test_wr_df["fantasy_points_ppr_predicted"]

In [54]:
# Ascending True for players that are due for postive regression, False for players that are due for negative regression

test_wr_df.groupby("player_name", as_index=False)["predicted_error"].agg("sum").sort_values("predicted_error", ascending=True).head(40)

Unnamed: 0,player_name,predicted_error
12,A.Thielen,-42.751832
42,D.Adams,-29.167756
53,D.Hopkins,-27.031788
31,C.Kupp,-26.775194
14,B.Aiyuk,-26.134796
59,D.Moore,-25.765122
8,A.Pierce,-22.522169
153,O.Beckham,-22.023947
99,J.Mingo,-21.83236
173,R.Woods,-20.754383


In [55]:
# Ascending True for players that are due for postive regression, False for players that are due for negative regression

test_wr_df.groupby("player_name", as_index=False)["predicted_error"].agg("sum").sort_values("predicted_error", ascending=False).head(40)

Unnamed: 0,player_name,predicted_error
32,C.Lamb,46.432272
104,J.Reed,37.963941
187,T.Hill,35.486533
63,D.Samuel,30.871457
82,J.Addison,26.135558
98,J.Meyers,23.245166
115,K.Bourne,20.426865
185,T.Dell,20.371062
196,T.Palmer,19.190712
48,D.Davis,18.209458


In [10]:
# Time to do RBs
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score

SEED = 0

# Prepare data
rb_df["redzone"] = rb_df["redzone"].fillna(0)
rb_df["total_epa"] = rb_df["receiving_epa"] + rb_df["rushing_epa"]

predictors = ["offense_pct", "total_usage", "total_epa", "target_share", "redzone"]
response = ["fantasy_points_ppr"]

X = rb_df[predictors].values
y = rb_df[response].values

# Train/test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

# Pipelines
best_grid_params = dict(max_depth=10, max_features='sqrt', min_samples_split=10,
                      n_estimators=800)

steps = [
    ("imputer", SimpleImputer()),
    ("rfr", 
     RandomForestRegressor(random_state=SEED, **best_grid_params))
]

pipe_rb = Pipeline(steps=steps)
pipe_rb.fit(X_train, y_train)

y_pred = pipe_rb.predict(X_test)
score = r2_score(y_test, y_pred)
print(f"R2 Score: {score}")


R2 Score: 0.8296642573599169


In [11]:
dict(zip(predictors, list(pipe_rb.steps[1][1].feature_importances_)))

{'offense_pct': 0.18024819469376357,
 'total_usage': 0.38381197444415105,
 'total_epa': 0.2374968292632086,
 'target_share': 0.08344217210846422,
 'redzone': 0.11500082949041235}

In [12]:
test_rb_df = rb_df[rb_df["season"] == 2023]
test_rb_df["fantasy_points_ppr_predicted"] = pipe_rb.predict(test_rb_df[predictors].values)
test_rb_df["predicted_error"] = test_rb_df["fantasy_points_ppr"] - test_rb_df["fantasy_points_ppr_predicted"]

In [21]:
test_rb_df.groupby("player_name", as_index=False)["predicted_error"].agg("sum").sort_values("predicted_error", ascending=False).head(20)

Unnamed: 0,player_name,predicted_error
20,C.McCaffrey,38.82255
62,J.Gibbs,29.537946
106,R.Mostert,29.231915
24,D.Achane,26.386612
11,B.Robinson,23.36659
10,B.Hall,22.082836
73,J.Taylor,20.998775
60,J.Ford,19.496834
35,D.Montgomery,18.119553
119,T.Etienne,17.730348


In [25]:
exclude_words = ["season", "week"]
numerical_cols = [col for col in rb_df.columns if rb_df[col].dtype in ['int64', 'float64']\
                    and not any(word in col.lower() for word in exclude_words)]
correlation_matrix = rb_df[numerical_cols].corr()
correlation_matrix

predictors_rb = ["offense_pct", "total_usage", "rushing_epa", "rush_yards_over_expected_per_att", "target_share", "receiving_epa", "redzone"]

Unnamed: 0,offense_snaps,offense_pct,total_usage,rushing_epa,rush_yards_over_expected_per_att,rush_pct_over_expected,target_share,receiving_epa,redzone,fantasy_points,fantasy_points_ppr
offense_snaps,1.0,0.966869,0.859786,-0.072605,-0.040691,-0.059108,0.485312,0.067596,0.432156,0.642617,0.698619
offense_pct,0.966869,1.0,0.838101,-0.078422,-0.01118,-0.05826,0.519123,0.055762,0.396084,0.629417,0.681361
total_usage,0.859786,0.838101,1.0,-0.05085,0.048687,0.063777,0.495683,0.082004,0.568186,0.76111,0.78499
rushing_epa,-0.072605,-0.078422,-0.05085,1.0,0.585303,0.450763,-0.009777,0.022058,0.072874,0.373665,0.315623
rush_yards_over_expected_per_att,-0.040691,-0.01118,0.048687,0.585303,1.0,0.616455,0.009695,0.01901,0.066844,0.452237,0.391623
rush_pct_over_expected,-0.059108,-0.05826,0.063777,0.450763,0.616455,1.0,-0.026075,0.006713,0.101619,0.291832,0.244997
target_share,0.485312,0.519123,0.495683,-0.009777,0.009695,-0.026075,1.0,0.062956,0.1857,0.410711,0.564977
receiving_epa,0.067596,0.055762,0.082004,0.022058,0.01901,0.006713,0.062956,1.0,0.087832,0.3318,0.346662
redzone,0.432156,0.396084,0.568186,0.072874,0.066844,0.101619,0.1857,0.087832,1.0,0.531031,0.514113
fantasy_points,0.642617,0.629417,0.76111,0.373665,0.452237,0.291832,0.410711,0.3318,0.531031,1.0,0.974531


In [161]:
weekly = execute_statement(weekly_wr)
weekly_rb = execute_statement(weekly_rb)

#### ACTUAL ANALYSIS

In [10]:
from analysis import PositionReport as PR
import pandas as pd

In [40]:
from sklearn.mixture import GaussianMixture
import numpy as np

def tiering_players_all(self):
    def tiering_players_pos(pos: str):
        tier_num_mapping = {
            "QB": 8,
            "RB": 11,
            "WR": 12,
            "TE": 9,
        }

        training = self.projections.loc[self.projections["Position"] == pos]

        gm = GaussianMixture(n_components=tier_num_mapping[pos], random_state=0)
        training["gmm_labels"] = gm.fit_predict(training[["FPTS"]])

        tier_map = {}

        testlist = training["gmm_labels"].tolist()
        count = 1
        for num in testlist:
            if num not in tier_map:
                tier_map[num] = count
                count += 1

        training["Tier"] = training["gmm_labels"].map(tier_map)
        training.drop("gmm_labels", axis=1, inplace=True)
        training.reset_index(drop=True, inplace=True)

        return training

    df_list = []
    for position in ["QB", "RB", "WR", "TE"]:
        temp = tiering_players_pos(pos=position)
        df_list.append(temp)

    df = pd.concat(df_list)
    df.reset_index(drop=True, inplace=True)
    df = df[["Player", "Tier"]]

    return df


In [43]:
rb_report = PR("RB")
wr_report = PR("WR")
te_report = PR("TE")

In [56]:
def tiering_players_pos(df: pd.DataFrame, pos: str):
    tier_num_mapping = {
        "QB": 8,
        "RB": 16,
        "WR": 12,
        "TE": 12,
    }

    training = df.loc[df["position"] == pos]

    gm = GaussianMixture(n_components=tier_num_mapping[pos], random_state=0)
    training["gmm_labels"] = gm.fit_predict(training[["score"]])

    tier_map = {}

    testlist = training["gmm_labels"].tolist()
    count = 1
    for num in testlist:
        if num not in tier_map:
            tier_map[num] = count
            count += 1

    training["Tier"] = training["gmm_labels"].map(tier_map)
    training.drop("gmm_labels", axis=1, inplace=True)
    training.reset_index(drop=True, inplace=True)

    return training

In [52]:
full_year = wr_report.compare_reports(1, 18)
full_year = tiering_players_pos(full_year, "WR")
full_year.head(30)

Unnamed: 0,player_name,position,season,score,Tier
0,T.Hill,WR,2023,18.077757,1
1,C.Lamb,WR,2023,18.018627,1
2,K.Allen,WR,2023,17.745644,1
3,A.St. Brown,WR,2023,17.504041,2
4,A.Brown,WR,2023,17.329394,2
5,P.Nacua,WR,2023,17.155559,2
6,J.Jefferson,WR,2023,17.131128,2
7,D.Adams,WR,2023,16.975393,2
8,J.Chase,WR,2023,16.796612,3
9,S.Diggs,WR,2023,16.752274,3


In [61]:
full_year = rb_report.compare_reports(1, 18)
full_year = tiering_players_pos(full_year, "RB")

half_1 = rb_report.compare_reports(1, 9)
half_1 = tiering_players_pos(half_1, "RB")

half_2 = rb_report.compare_reports(9, 18)
half_2 = tiering_players_pos(half_2, "RB")

quarter_1 = rb_report.compare_reports(1, 4)
quarter_1 = tiering_players_pos(quarter_1, "RB")

quarter_2 = rb_report.compare_reports(4, 8)
quarter_2 = tiering_players_pos(quarter_2, "RB")

quarter_3 = rb_report.compare_reports(8, 12)
quarter_3 = tiering_players_pos(quarter_3, "RB")

final_6 = rb_report.compare_reports(12, 18)
final_6 = tiering_players_pos(final_6, "RB")

joined = full_year.merge(half_1, on=["player_name", "position", "season"], how="left") \
            .merge(half_2, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_1, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_2, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_3, on=["player_name", "position", "season"], how="left") \
            .merge(final_6, on=["player_name", "position", "season"], how="left")

joined.columns = [
    "player_name",
    "position",
    "season",
    "full_year_score",
    "full_year_tier",
    "half_1_score",
    "half_1_tier",
    "half_2_score",
    "half_2_tier",
    "quarter_1_score",
    "quarter_1_tier",
    "quarter_2_score",
    "quarter_2_tier",
    "quarter_3_score",
    "quarter_3_tier",
    "final_6_score",
    "final_6_tier"
]

joined["avg_score"] = joined[["full_year_score", "half_1_score", "half_2_score", "quarter_1_score",
                              "quarter_2_score", "quarter_3_score", "final_6_score"]].mean(axis=1)

joined["avg_tier"] = joined[["full_year_tier", "half_1_tier", "half_2_tier", "quarter_1_tier",
                              "quarter_2_tier", "quarter_3_tier", "final_6_tier"]].mean(axis=1)

joined.sort_values("avg_tier").head(20)

joined.sort_values("avg_tier").to_csv("rb_final_report_2023.csv")

In [62]:
full_year = wr_report.compare_reports(1, 18)
full_year = tiering_players_pos(full_year, "WR")

half_1 = wr_report.compare_reports(1, 9)
half_1 = tiering_players_pos(half_1, "WR")

half_2 = wr_report.compare_reports(9, 18)
half_2 = tiering_players_pos(half_2, "WR")

quarter_1 = wr_report.compare_reports(1, 4)
quarter_1 = tiering_players_pos(quarter_1, "WR")

quarter_2 = wr_report.compare_reports(4, 8)
quarter_2 = tiering_players_pos(quarter_2, "WR")

quarter_3 = wr_report.compare_reports(8, 12)
quarter_3 = tiering_players_pos(quarter_3, "WR")

final_6 = wr_report.compare_reports(12, 18)
final_6 = tiering_players_pos(final_6, "WR")

joined = full_year.merge(half_1, on=["player_name", "position", "season"], how="left") \
            .merge(half_2, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_1, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_2, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_3, on=["player_name", "position", "season"], how="left") \
            .merge(final_6, on=["player_name", "position", "season"], how="left")

joined.columns = [
    "player_name",
    "position",
    "season",
    "full_year_score",
    "full_year_tier",
    "half_1_score",
    "half_1_tier",
    "half_2_score",
    "half_2_tier",
    "quarter_1_score",
    "quarter_1_tier",
    "quarter_2_score",
    "quarter_2_tier",
    "quarter_3_score",
    "quarter_3_tier",
    "final_6_score",
    "final_6_tier"
]

joined["avg_score"] = joined[["full_year_score", "half_1_score", "half_2_score", "quarter_1_score",
                              "quarter_2_score", "quarter_3_score", "final_6_score"]].mean(axis=1)

joined["avg_tier"] = joined[["full_year_tier", "half_1_tier", "half_2_tier", "quarter_1_tier",
                              "quarter_2_tier", "quarter_3_tier", "final_6_tier"]].mean(axis=1)

joined.sort_values("avg_tier").head(20)

joined.sort_values("avg_tier").to_csv("wr_final_report_2023.csv")

In [60]:
final_6.head(30)

Unnamed: 0,player_name,position,season,score,Tier
0,C.Lamb,WR,2023,18.666107,1
1,T.Hill,WR,2023,18.230649,1
2,C.Watson,WR,2023,17.946312,1
3,M.Hardman,WR,2023,17.306591,2
4,A.St. Brown,WR,2023,17.290407,2
5,R.Rice,WR,2023,17.254041,2
6,D.Adams,WR,2023,17.218789,2
7,J.Waddle,WR,2023,17.177034,2
8,N.Collins,WR,2023,17.167555,2
9,M.Pittman,WR,2023,16.976181,2


In [63]:
full_year = te_report.compare_reports(1, 18)
full_year = tiering_players_pos(full_year, "TE")

half_1 = te_report.compare_reports(1, 9)
half_1 = tiering_players_pos(half_1, "TE")

half_2 = te_report.compare_reports(9, 18)
half_2 = tiering_players_pos(half_2, "TE")

quarter_1 = te_report.compare_reports(1, 4)
quarter_1 = tiering_players_pos(quarter_1, "TE")

quarter_2 = te_report.compare_reports(4, 8)
quarter_2 = tiering_players_pos(quarter_2, "TE")

quarter_3 = te_report.compare_reports(8, 12)
quarter_3 = tiering_players_pos(quarter_3, "TE")

final_6 = te_report.compare_reports(12, 18)
final_6 = tiering_players_pos(final_6, "TE")

joined = full_year.merge(half_1, on=["player_name", "position", "season"], how="left") \
            .merge(half_2, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_1, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_2, on=["player_name", "position", "season"], how="left") \
            .merge(quarter_3, on=["player_name", "position", "season"], how="left") \
            .merge(final_6, on=["player_name", "position", "season"], how="left")

joined.columns = [
    "player_name",
    "position",
    "season",
    "full_year_score",
    "full_year_tier",
    "half_1_score",
    "half_1_tier",
    "half_2_score",
    "half_2_tier",
    "quarter_1_score",
    "quarter_1_tier",
    "quarter_2_score",
    "quarter_2_tier",
    "quarter_3_score",
    "quarter_3_tier",
    "final_6_score",
    "final_6_tier"
]

joined["avg_score"] = joined[["full_year_score", "half_1_score", "half_2_score", "quarter_1_score",
                              "quarter_2_score", "quarter_3_score", "final_6_score"]].mean(axis=1)

joined["avg_tier"] = joined[["full_year_tier", "half_1_tier", "half_2_tier", "quarter_1_tier",
                              "quarter_2_tier", "quarter_3_tier", "final_6_tier"]].mean(axis=1)

joined.sort_values("avg_tier").head(20)

joined.sort_values("avg_tier").to_csv("te_final_report_2023.csv")

In [45]:
joined

Unnamed: 0,player_name,position,season,full_year_score,full_year_tier,half_1_score,half_1_tier,half_2_score,half_2_tier,quarter_1_score,quarter_1_tier,quarter_2_score,quarter_2_tier,quarter_3_score,quarter_3_tier,final_6_score,final_6_tier
0,C.McCaffrey,RB,2023,18.610934,1,18.137937,1,19.083932,1,19.473737,1,17.761963,2,19.464160,2,18.742652,2
1,K.Williams,RB,2023,18.517038,1,17.652758,2,19.381317,1,17.693874,3,17.479704,2,20.832980,1,19.381317,1
2,A.Kamara,RB,2023,17.253960,2,18.076050,1,16.435582,4,17.435797,4,18.563366,1,16.559516,5,16.579898,5
3,S.Barkley,RB,2023,17.174117,2,17.952345,1,16.726767,3,17.200401,4,18.498644,1,17.399246,3,16.449613,5
4,J.Jacobs,RB,2023,17.112200,2,17.467483,2,16.746940,3,17.253555,4,17.750095,2,17.000434,4,16.599784,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,B.Scott,RB,2023,11.684969,15,11.730872,15,11.654367,15,11.913044,15,11.548700,15,11.395483,16,11.746693,15
71,P.Strong,RB,2023,11.662788,15,11.771196,15,11.557959,15,11.536510,16,11.984760,14,11.684515,15,11.675784,15
72,D.Dallas,RB,2023,11.626453,15,11.523605,16,11.780724,15,11.560085,16,11.324968,16,11.582208,15,11.946779,15
73,H.Luepke,RB,2023,11.623974,15,11.523806,16,11.615580,15,11.761303,15,11.682124,15,11.286308,16,11.824312,15


In [17]:
combined = (PR(pos).compare_reports(1, 1) for pos in ["RB", "WR", "TE"])
combined = pd.concat(combined)

In [14]:
rb_report.compare_reports(9, 18).head(60)

Unnamed: 0,player_name,position,season,score
77,K.Williams,RB,2023,19.381317
18,C.McCaffrey,RB,2023,19.083932
45,I.Pacheco,RB,2023,17.61965
97,R.White,RB,2023,17.308671
61,J.Taylor,RB,2023,16.953156
47,J.Conner,RB,2023,16.931854
59,J.Mixon,RB,2023,16.888229
9,B.Hall,RB,2023,16.817062
96,R.Stevenson,RB,2023,16.764434
53,J.Jacobs,RB,2023,16.74694


In [6]:
rb_report.get_report_by_week(1).head(50)

Unnamed: 0,player_name,position,season,week,score
1357,C.McCaffrey,RB,2023,1,20.205158
627,A.Ekeler,RB,2023,1,18.887076
152,T.Pollard,RB,2023,1,18.558381
479,J.Jacobs,RB,2023,1,18.222568
426,T.Etienne,RB,2023,1,18.026707
385,N.Chubb,RB,2023,1,17.617855
768,K.Williams,RB,2023,1,17.5572
448,J.Conner,RB,2023,1,17.522147
755,D.Montgomery,RB,2023,1,17.51988
1297,B.Robinson,RB,2023,1,17.510458


In [7]:
rb_report.compare_reports(1, 1).head(50)

Unnamed: 0,player_name,position,season,score
15,C.McCaffrey,RB,2023,20.205158
2,A.Ekeler,RB,2023,18.887076
76,T.Pollard,RB,2023,18.558381
42,J.Jacobs,RB,2023,18.222568
74,T.Etienne,RB,2023,18.026707
59,N.Chubb,RB,2023,17.617855
53,K.Williams,RB,2023,17.5572
35,J.Conner,RB,2023,17.522147
24,D.Montgomery,RB,2023,17.51988
71,T.Allgeier,RB,2023,17.416769


In [5]:
wr_report.get_report_by_week(18).head(36)

Unnamed: 0,player_name,position,season,week,score
665,N.Collins,WR,2023,18,20.124781
1573,C.Lamb,WR,2023,18,19.661264
502,J.Jefferson,WR,2023,18,19.130467
975,T.Hill,WR,2023,18,18.540879
584,A.St. Brown,WR,2023,18,17.826759
1717,D.Moore,WR,2023,18,17.609691
736,M.Hardman,WR,2023,18,17.306591
793,C.Ridley,WR,2023,18,17.006087
327,D.Adams,WR,2023,18,16.811423
1653,M.Wilson,WR,2023,18,16.69771


In [57]:
wr_report.compare_reports(9,18).head(60)

Unnamed: 0,player_name,position,season,score
29,C.Lamb,WR,2023,18.644343
176,T.Hill,WR,2023,17.888611
108,K.Allen,WR,2023,17.856827
8,A.St. Brown,WR,2023,17.820558
174,T.Dell,WR,2023,17.441119
38,D.Adams,WR,2023,17.005776
133,M.Pittman,WR,2023,16.986714
141,N.Collins,WR,2023,16.976107
88,J.Jefferson,WR,2023,16.951222
0,A.Brown,WR,2023,16.727772


In [5]:
import pandas as pd


te_report.get_report_by_week(18).head(60)

AttributeError: 'float' object has no attribute 'min'

In [11]:
te_report.compare_reports(9, 18).head(24)

Unnamed: 0,player_name,position,season,score
96,T.Hockenson,TE,2023,17.843634
39,G.Kittle,TE,2023,17.359544
12,C.Kmet,TE,2023,17.193917
100,T.McBride,TE,2023,16.789711
33,E.Engram,TE,2023,16.754042
90,S.LaPorta,TE,2023,16.736937
50,J.Ferguson,TE,2023,16.634764
98,T.Kelce,TE,2023,16.301173
29,D.Schultz,TE,2023,16.212254
74,M.Andrews,TE,2023,16.161107


In [286]:
def get_corr_heatmap(df: pd.DataFrame):
    exclude_words = ["season", "week"]
    numerical_cols = [col for col in df.columns if df[col].dtype in ['int64', 'float64']\
                      and not any(word in col.lower() for word in exclude_words)]
    correlation_matrix = df[numerical_cols].corr()
    mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
    sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', linewidths=0.5, mask=mask)
    plt.show()

In [4]:
rushing_cols = [
    "rushing_yards",
    "rushing_tds",
    "rushing_fumbles",
    "rushing_fumbles_lost",
    "rushing_first_downs",
    "rushing_epa",
    "efficiency",
    "percent_attempts_gte_eight_defenders",
    "avg_time_to_los",
    "rush_yards_over_expected",
    "avg_rush_yards",
    "rush_yards_over_expected_per_att",
    "rush_pct_over_expected",
    "targets",
    "offense_snaps",
    "offense_pct",
    "redzone",
    "total_usage",
]

weekly[weekly["player_name"].isna()]

Unnamed: 0,player_id,player_name,position,recent_team,season,week,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_yards_over_expected,avg_rush_yards,rush_yards_over_expected_per_att,rush_pct_over_expected,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,racr,target_share,air_yards_share,wopr,offense_snaps,offense_pct,redzone,total_usage,fantasy_points,fantasy_points_ppr,roof,surface,weather_hazards,temp,humidity,wind_speed
0,00-0020337,,WR,BAL,2016,1,0,0.0,0,0.0,0.0,0.0,,,,,,,,,5,8,19.0,0,0.0,0.0,41.0,12.0,0.0,-1.190828,0.463415,0.250000,0.166667,0.491667,45.0,0.66,1.0,8,1.9,6.9,,,,,,
1,00-0020337,,WR,BAL,2016,2,0,0.0,0,0.0,0.0,0.0,,,,,,,,,3,6,64.0,0,0.0,0.0,80.0,5.0,3.0,0.058890,0.800000,0.133333,0.164948,0.315464,53.0,0.71,,6,6.4,9.4,,,,,,
2,00-0020337,,WR,BAL,2016,3,0,0.0,0,0.0,0.0,0.0,,,,,,,,,8,11,87.0,0,0.0,0.0,66.0,39.0,5.0,3.585929,1.318182,0.275000,0.246269,0.584888,40.0,0.60,,11,8.7,16.7,,,,,,
3,00-0020337,,WR,BAL,2016,4,0,0.0,0,0.0,0.0,0.0,,,,,,,,,8,11,111.0,1,0.0,0.0,88.0,60.0,4.0,4.417709,1.261364,0.220000,0.270769,0.519538,73.0,0.82,2.0,11,17.1,25.1,,,,,,
4,00-0020337,,WR,BAL,2016,5,0,0.0,0,0.0,0.0,0.0,,,,,,,,,3,3,29.0,0,0.0,0.0,27.0,2.0,2.0,0.981787,1.074074,0.063830,0.079882,0.151662,13.0,0.18,,3,2.9,5.9,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15040,00-0032482,,WR,CHI,2016,17,1,4.0,0,0.0,0.0,0.0,-0.529760,,,,,,,,1,1,11.0,0,0.0,0.0,3.0,8.0,1.0,0.881909,3.666667,0.050000,0.017442,0.087209,2.0,0.03,,2,1.5,2.5,dome,sportturf,NONE,,,
15251,00-0032636,,RB,GB,2016,7,2,6.0,0,0.0,0.0,0.0,0.127916,,,,,,,,0,1,0.0,0,0.0,0.0,10.0,0.0,0.0,-0.739813,0.000000,0.017857,0.032154,0.049294,,,,3,0.6,0.6,outdoors,grass,NONE,47.0,0.63,10.0
15252,00-0032636,,RB,GB,2016,8,4,10.0,0,0.0,0.0,1.0,-0.476560,,,,,,,,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,,,,,,,1.0,4,1.0,1.0,dome,fieldturf,NONE,,,
15253,00-0032636,,RB,GB,2016,9,4,16.0,0,0.0,0.0,0.0,-0.326300,,,,,,,,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,4,1.6,1.6,outdoors,grass,NONE,68.0,0.55,7.0
