In [1]:
import pandas as pd

# Train with 2022-2023 data, Test with 2023-2024 data
df_22 = pd.read_csv('data/2022playerstats_TE.csv')
df_23 = pd.read_csv('data/2023playerstats_TE.csv')
df_24 = pd.read_csv('data/2024playerstats_TE.csv')

print(df_22.head())
print(df_23.head())
print(df_24.head())

           Player FantPos  Age  GamesPlayed  GamesStarted  PassCmp  PassAtt  \
0    Travis Kelce      TE   33           17            17        0        0   
1   George Kittle      TE   29           15            15        0        0   
2     Taysom Hill      TE   32           16             8       13       19   
3  T.J. Hockenson      TE   25           17            14        0        0   
4    Mark Andrews      TE   27           15            15        0        0   

   PassYds  PassTD  PassInt  ...  Team_NWE  Team_NYG  Team_NYJ  Team_PHI  \
0        0       0        0  ...     False     False     False     False   
1        0       0        0  ...     False     False     False     False   
2      240       2        0  ...     False     False     False     False   
3        0       0        0  ...     False     False     False     False   
4        0       0        0  ...     False     False     False     False   

   Team_PIT  Team_SEA  Team_SFO  Team_TAM  Team_TEN  Team_WAS  
0   

# TE Model:
- targets per game
- recs per game
- rec yards per game
- rec tds per game

In [2]:
def compute_features(df):
    df['TargetsPerGame'] = df['Targets'] / df['GamesPlayed'].replace(0, pd.NA)
    df['TargetsPerGame'] = df['TargetsPerGame'].fillna(0)

    df['RecsPerGame'] = df['Receptions'] / df['GamesPlayed'].replace(0, pd.NA)
    df['RecsPerGame'] = df['RecsPerGame'].fillna(0)

    df['RecYdsPerGame'] = df['RecYds'] / df['GamesPlayed'].replace(0, pd.NA)
    df['RecYdsPerGame'] = df['RecYdsPerGame'].fillna(0)

    df['RecTDsPerGame'] = df['RecTD'] / df['GamesPlayed'].replace(0, pd.NA)
    df['RecTDsPerGame'] = df['RecTDsPerGame'].fillna(0)

In [3]:
# Compute the features needed
compute_features(df_22)
compute_features(df_23)

print(df_22.head())
print(df_23.head())

           Player FantPos  Age  GamesPlayed  GamesStarted  PassCmp  PassAtt  \
0    Travis Kelce      TE   33           17            17        0        0   
1   George Kittle      TE   29           15            15        0        0   
2     Taysom Hill      TE   32           16             8       13       19   
3  T.J. Hockenson      TE   25           17            14        0        0   
4    Mark Andrews      TE   27           15            15        0        0   

   PassYds  PassTD  PassInt  ...  Team_PIT  Team_SEA  Team_SFO  Team_TAM  \
0        0       0        0  ...     False     False     False     False   
1        0       0        0  ...     False     False      True     False   
2      240       2        0  ...     False     False     False     False   
3        0       0        0  ...     False     False     False     False   
4        0       0        0  ...     False     False     False     False   

   Team_TEN  Team_WAS  TargetsPerGame  RecsPerGame  RecYdsPerGame  \

In [4]:
# Keep only the features we want
features = [
    'TargetsPerGame',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame',
]

# Merge the data frames to remove players that are not in both
df_22_23_merged = df_22.merge(df_23, on='Player', how='inner', suffixes=('', '_23'))
df_23_24_merged = df_23.merge(df_24, on='Player', how='inner', suffixes=('', '_24'))

y_test = df_23_24_merged['FantasyPtsPPR_24']
y_train = df_22_23_merged['FantasyPtsPPR_23']

X_test = df_23_24_merged[features]
X_train = df_22_23_merged[features]

print(y_test.head())
print(y_train.head())
print(X_test.head())
print(X_train.head())

0    174.6
1    236.6
2    195.4
3     86.5
4    148.5
Name: FantasyPtsPPR_24, dtype: float64
0    219.4
1    203.2
2    143.5
3    219.0
4    135.4
Name: FantasyPtsPPR_23, dtype: float64
   TargetsPerGame  RecsPerGame  RecYdsPerGame  RecTDsPerGame
0        7.058824     5.058824      52.294118       0.588235
1        5.625000     4.062500      63.750000       0.375000
2        8.066667     6.200000      65.600000       0.333333
3        8.466667     6.333333      64.000000       0.333333
4        7.687500     5.062500      55.125000       0.375000
   TargetsPerGame  RecsPerGame  RecYdsPerGame  RecTDsPerGame
0        8.941176     6.470588      78.705882       0.705882
1        5.733333     4.000000      51.000000       0.733333
2        0.812500     0.562500       4.812500       0.125000
3        7.588235     5.058824      53.764706       0.352941
4        7.533333     4.866667      56.466667       0.333333


In [5]:
# Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])
print(X_test[:5])

[[ 3.16115479  3.38702956  3.85328678  4.01634945]
 [ 1.63053945  1.67014935  2.12988718  4.20525995]
 [-0.71742541 -0.71866463 -0.74313156  0.01886833]
 [ 2.51560186  2.40595516  2.30186123  1.58750016]
 [ 2.48940551  2.27242003  2.46993233  1.45256409]]
[[2.2629942  2.40595516 2.21038567 3.20673302]
 [1.57884844 1.71358234 2.9229803  1.73930324]
 [2.74388435 3.1989903  3.03805656 1.45256409]
 [2.93474347 3.29164733 2.93853115 1.45256409]
 [2.5629658  2.40851004 2.38647613 1.73930324]]


In [6]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 104
Testing samples: 111
Features: 4
Samples per feature: 26.0
Training target stats: mean=54.0, std=59.8
Testing target stats: mean=54.2, std=61.0


In [7]:
# Train and evaluate
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 39.525061890766885
Training R^2: 0.5586226584247503
Training MAE: 28.305765761588525
Testing RMSE: 40.00707160283916
Testing R^2: 0.5663739914900374
Testing MAE: 28.6164957112682


In [8]:
print(preds[:5])
print(y_test.head())

[164.81634751 160.3475305  186.85645898 186.14796474 159.96842831]
0    174.6
1    236.6
2    195.4
3     86.5
4    148.5
Name: FantasyPtsPPR_24, dtype: float64


In [9]:
# Model coefficients
print("Intercept:", model.intercept_)
for feature, coef in zip(features, model.coef_):
    print(f"{feature}: {coef:.4f}")

Intercept: 53.97980769230769
TargetsPerGame: -0.3560
RecsPerGame: 17.5197
RecYdsPerGame: 22.7467
RecTDsPerGame: 5.9910


In [10]:
from helpers import compute_rank_squared_error, build_results_df

results = build_results_df(y_test, preds, df_23_24_merged['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("Rank Squared Error:", rank_squared_error)

                 Player  Actual   Predicted  ActualRank  PredictedRank  RankError
0           Sam LaPorta   174.6  164.816348           7              4         -3
1         George Kittle   236.6  160.347531           2              6          4
2          Travis Kelce   195.4  186.856459           4              1         -3
3        T.J. Hockenson    86.5  186.147965          33              2        -31
4           David Njoku   148.5  159.968428          10              7         -3
5           Evan Engram    89.5  176.247207          32              3        -29
6           Taysom Hill   102.3   61.753995          28             34          6
7             Cole Kmet   120.4  131.955823          18              9         -9
8         Jake Ferguson   104.4  131.474059          24             10        -14
9          Trey McBride   249.8  139.072170           1              8          7
10       Dalton Schultz   118.2  126.713129          19             11         -8
11         Mark 

In [11]:
results_sorted_by_projected_rank = results.sort_values(by="PredictedRank")
print(results_sorted_by_projected_rank.to_string())

                 Player  Actual   Predicted  ActualRank  PredictedRank  RankError
2          Travis Kelce   195.4  186.856459           4              1         -3
3        T.J. Hockenson    86.5  186.147965          33              2        -31
5           Evan Engram    89.5  176.247207          32              3        -29
0           Sam LaPorta   174.6  164.816348           7              4         -3
11         Mark Andrews   188.8  161.640274           5              5          0
1         George Kittle   236.6  160.347531           2              6          4
4           David Njoku   148.5  159.968428          10              7         -3
9          Trey McBride   249.8  139.072170           1              8          7
7             Cole Kmet   120.4  131.955823          18              9         -9
8         Jake Ferguson   104.4  131.474059          24             10        -14
10       Dalton Schultz   118.2  126.713129          19             11         -8
15       Dalton 

In [12]:
results_sorted_by_projected_rank.to_csv("projections_TE.csv")