In [1]:
import os
from kaggle.api.kaggle_api_extended import KaggleApi

os.environ['KAGGLE_CONFIG_DIR'] = r'C:\Users\omend\.kaggle'

api = KaggleApi()
api.authenticate()

api.dataset_download_files("philiphyde1/nfl-stats-1999-2022", path=r"./data", unzip=True)
print("Download Complete!")

Dataset URL: https://www.kaggle.com/datasets/philiphyde1/nfl-stats-1999-2022
Download Complete!


In [2]:
#Data load and ETL
import pandas as pd
import numpy as np

# Feature engineering and preprocessing
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Visualizations
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#4 REgression & Machine learning Models
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import  RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
from xgboost import XGBRegressor

#Time series Forecasting
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA


# Recommendation & Optimization
from sklearn.metrics.pairwise import cosine_similarity
from scipy.optimize import linprog
import pulp

In [None]:
yearly_data_off = pd.read_csv('./fantasy-football-recommendations/data/yearly_player_stats_offense.csv', encoding='utf-8')
yearly_data_def = pd.read_csv('./fantasy-football-recommendations/data/yearly_player_stats_defense.csv', encoding='utf-8')

weekly_data_off = pd.read_csv('./fantasy-football-recommendations/data/weekly_player_stats_offense.csv')
print(yearly_data_off.head(10))
print(weekly_data_off.head(5))

    player_id      player_name position  birth_year  draft_year  draft_round  \
0  00-0000865    Charlie Batch       QB        1974      1998.0          2.0   
1  00-0004541    Donald Driver       WR        1975      1999.0          7.0   
2  00-0006101    Tony Gonzalez       TE        1976      1997.0          1.0   
3  00-0006101    Tony Gonzalez       TE        1976      1997.0          1.0   
4  00-0007091  Matt Hasselbeck       QB        1975      1998.0          6.0   
5  00-0010346   Peyton Manning       QB        1976      1998.0          1.0   
6  00-0010346   Peyton Manning       QB        1976      1998.0          1.0   
7  00-0011754       Randy Moss       WR        1977      1998.0          1.0   
8  00-0011754       Randy Moss       WR        1977      1998.0          1.0   
9  00-0015754  Brandon Stokley       WR        1976      1999.0          4.0   

   draft_pick  draft_ovr  height  weight  ... delta_comp_pct  delta_int_pct  \
0        30.0       60.0    74.0   216.0

In [4]:
yearly_data_off.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7133 entries, 0 to 7132
Columns: 660 entries, player_id to delta_yptouch
dtypes: float64(647), int64(5), object(8)
memory usage: 35.9+ MB


In [5]:
#Filter only 2021-2024 seasons
yearly_data_off = yearly_data_off[yearly_data_off['season'].isin([2021,2022,2023, 2024])]
weekly_data_off = weekly_data_off[weekly_data_off['season'].isin([2021, 2022, 2023, 2024])]

In [None]:
#drop players with no games played

yearly_data_off = yearly_data_off[yearly_data_off['games_played_season'] > 0]
weekly_data_off = weekly_data_off[weekly_data_off['games_played_season'] > 0]

4688    2021
4689    2021
4690    2021
4691    2021
4692    2021
4693    2021
4694    2021
4695    2021
4696    2021
4697    2021
Name: season, dtype: int64

# Feature Engineering

In [12]:
#Keep some aggregate features
yearly_features = yearly_data_off[['player_id', 'season', 'passing_yards','pass_touchdown',
                                   'rushing_yards','rush_touchdown','receptions','receiving_yards',
                                   'receiving_touchdown','fantasy_points_ppr']]

In [17]:
# rolling averages: last 3 weeks players
weekly_data_off = weekly_data_off.sort_values(['player_id','season','week'])

weekly_data_off['fp_last3'] = (
    weekly_data_off.groupby('player_id')['fantasy_points_ppr'].transform(lambda x: x.rolling(3, min_periods=1).mean())
)

weekly_features = weekly_data_off[['player_id','season','week','passing_yards','pass_touchdown',
                                   'rushing_yards','rush_touchdown','receptions','receiving_yards',
                                   'receiving_touchdown','fantasy_points_ppr']]

In [19]:
#Merge yearly summary into weekly data
data_merged = weekly_features.merge(yearly_features, on=['player_id','season'],
                                    suffixes=('_weekly','_yearly'))

data_merged

Unnamed: 0,player_id,season,week,passing_yards_weekly,pass_touchdown_weekly,rushing_yards_weekly,rush_touchdown_weekly,receptions_weekly,receiving_yards_weekly,receiving_touchdown_weekly,fantasy_points_ppr_weekly,passing_yards_yearly,pass_touchdown_yearly,rushing_yards_yearly,rush_touchdown_yearly,receptions_yearly,receiving_yards_yearly,receiving_touchdown_yearly,fantasy_points_ppr_yearly
0,00-0019596,2021,1,379.0,4.0,0.0,0.0,0.0,0.0,0.0,35.16,329.0,1.0,0.0,0.0,0.0,0.0,0.0,14.16
1,00-0019596,2021,1,379.0,4.0,0.0,0.0,0.0,0.0,0.0,35.16,5316.0,43.0,81.0,2.0,0.0,0.0,0.0,457.74
2,00-0019596,2021,2,276.0,5.0,6.0,0.0,0.0,0.0,0.0,38.64,329.0,1.0,0.0,0.0,0.0,0.0,0.0,14.16
3,00-0019596,2021,2,276.0,5.0,6.0,0.0,0.0,0.0,0.0,38.64,5316.0,43.0,81.0,2.0,0.0,0.0,0.0,457.74
4,00-0019596,2021,3,432.0,1.0,14.0,1.0,0.0,0.0,0.0,30.68,329.0,1.0,0.0,0.0,0.0,0.0,0.0,14.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25198,00-0039921,2024,10,0.0,0.0,62.0,0.0,2.0,25.0,0.0,10.70,0.0,0.0,291.0,1.0,6.0,59.0,0.0,47.00
25199,00-0039921,2024,12,0.0,0.0,18.0,0.0,0.0,0.0,0.0,1.80,0.0,0.0,291.0,1.0,6.0,59.0,0.0,47.00
25200,00-0039921,2024,13,0.0,0.0,20.0,0.0,0.0,0.0,0.0,2.00,0.0,0.0,291.0,1.0,6.0,59.0,0.0,47.00
25201,00-0039921,2024,14,0.0,0.0,15.0,0.0,1.0,4.0,0.0,2.90,0.0,0.0,291.0,1.0,6.0,59.0,0.0,47.00


In [None]:
features = ['passing_yards_weekly', 'passing_touchdown_weekly','rushing_touchdown_weekly','rushing_touchdown_weekly',
            'receptions_weekly','receiving_yards_weekly', 'receiving_touchdown_weekly',
            'fp_last3','passing_yards_yearly', 'rushing_yards_yearly']

target = 'fantasy_points_ppr_weekly'