In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as utils_data
from torch.autograd import Variable

from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score

import pandas as pd
import numpy as np

import pickle
import json
import os
import warnings
import datetime

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"{torch.cuda.device_count()} GPU(s) available and will be used.\nGPU name(s): {[torch.cuda.get_device_name(n) for n in range(torch.cuda.device_count())]}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU.")

1 GPU(s) available and will be used.
GPU name(s): ['Tesla V100-PCIE-16GB']


In [3]:
RANDOM_STATE = 42

In [4]:
class tanh_MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(tanh_MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        
    def forward(self, x):
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        out = self.fc3(x)
        return out

In [5]:
# Instantiate a dataframe on first dataset to get columns
nfl_df = pd.read_csv('../dataset_generators/datasets/0_train_normalized.csv')
# Get normalized columns
y = ['fantasy_points']
X = [col for col in nfl_df.columns if col not in ('player_id'
                                                        , 'player_name'
                                                        , 'player_display_name'
                                                        , 'position','position_group'
                                                        , 'headshot_url'
                                                        , 'season'
                                                        , 'week'
                                                        , 'season_type'
                                                        , 'team'
                                                        , 'opponent_team'
                                                        , 'fantasy_points'
                                                        , 'Unnamed: 0')]
del nfl_df

In [6]:
X

['age',
 'years_exp',
 'passing_yards_r_avg_1_lag_1',
 'passing_yards_r_avg_3_lag_1',
 'passing_yards_r_avg_5_lag_1',
 'passing_yards_r_avg_8_lag_1',
 'passing_tds_r_avg_1_lag_1',
 'passing_tds_r_avg_3_lag_1',
 'passing_tds_r_avg_5_lag_1',
 'passing_tds_r_avg_8_lag_1',
 'passing_interceptions_r_avg_1_lag_1',
 'passing_interceptions_r_avg_3_lag_1',
 'passing_interceptions_r_avg_5_lag_1',
 'passing_interceptions_r_avg_8_lag_1',
 'passing_2pt_conversions_r_avg_1_lag_1',
 'passing_2pt_conversions_r_avg_3_lag_1',
 'passing_2pt_conversions_r_avg_5_lag_1',
 'passing_2pt_conversions_r_avg_8_lag_1',
 'sack_fumbles_lost_r_avg_1_lag_1',
 'sack_fumbles_lost_r_avg_3_lag_1',
 'sack_fumbles_lost_r_avg_5_lag_1',
 'sack_fumbles_lost_r_avg_8_lag_1',
 'rushing_yards_r_avg_1_lag_1',
 'rushing_yards_r_avg_3_lag_1',
 'rushing_yards_r_avg_5_lag_1',
 'rushing_yards_r_avg_8_lag_1',
 'rushing_tds_r_avg_1_lag_1',
 'rushing_tds_r_avg_3_lag_1',
 'rushing_tds_r_avg_5_lag_1',
 'rushing_tds_r_avg_8_lag_1',
 'rushing_

In [30]:
nfl_df = pd.read_csv('../dataset_generators/datasets/final_1_lag_ffa_dataset.csv')
nfl_df[(nfl_df['position']=='RB')
      #& (nfl_df['fantasy_points']>8.0)
      #& (nfl_df['receiving_yards_r_avg_1_lag_1']==0.0)
      #& (nfl_df['receiving_yards_r_avg_3_lag_1']==0.0)
      & (nfl_df['player_id']=='00-0039040')
      & (nfl_df['season']==2023)
      & (nfl_df['week']==3)
      ][['week','rushing_yards_r_avg_1_lag_1',
 'rushing_yards_r_avg_3_lag_1',
 'rushing_yards_r_avg_5_lag_1',
 'rushing_yards_r_avg_8_lag_1',
 'rushing_tds_r_avg_1_lag_1',
 'rushing_tds_r_avg_3_lag_1',
 'rushing_tds_r_avg_5_lag_1',
 'rushing_tds_r_avg_8_lag_1','fantasy_points']].sort_values('week',ascending=False)
# nfl_df[(nfl_df['position']=='QB')
#       & (nfl_df['player_display_name']=='Joe Burrow')]


Unnamed: 0,week,rushing_yards_r_avg_1_lag_1,rushing_yards_r_avg_3_lag_1,rushing_yards_r_avg_5_lag_1,rushing_yards_r_avg_8_lag_1,rushing_tds_r_avg_1_lag_1,rushing_tds_r_avg_3_lag_1,rushing_tds_r_avg_5_lag_1,rushing_tds_r_avg_8_lag_1,fantasy_points
59754,3,5.0,5.0,5.0,5.0,0.0,0.0,0.0,0.0,47.3


In [8]:
stats = [[]]
for n in range(66):
    if n == 0:
        stats[0].append(21)
    else:
        stats[0].append(0)
stats_df = pd.DataFrame(stats,columns=X)

In [9]:
# The best model is the one below
model = '_100-100_tanh_100_0.0001_norm.pickle'

# Get the dataset
dataset_name = '../dataset_generators/datasets/final_1_lag_ffa_dataset.csv'
data_df = pd.read_csv(dataset_name)
# data_df = data_df[(data_df['season']==2024)
#                  & (data_df['week']==3)
#                  & (nfl_df['player_id']=='00-0036442')]

# Instantiate the predictions list
predictions = []

# Iterate over all 5 versions of the model that were trained on the KFold cross validation sets
for n in range(5):
    # Load the model    
    model_name = f'../model_generators/mlp/models/{n}{model}'
    with open(model_name, 'rb') as f:
        #print(model)
        mlp = pickle.load(f)
        mlp.eval()
    # Get the scaler
    scaler_name = f'../dataset_generators/datasets/{n}_scaler.pickle'
    with open(scaler_name, 'rb') as f:
        #print(model)
        scaler = pickle.load(f)
    new_df_x = scaler.transform(data_df[X])
    new_df_y = data_df[['player_id','player_display_name','season','week','fantasy_points']]
    #new_df_x = scaler.transform(stats_df)
    X_test_tensor = torch.tensor(new_df_x, dtype=torch.float32).to(device)
    # Get the predictions for y
    with torch.no_grad():
        try:
            y_pred = mlp(X_test_tensor)
            y_pred = y_pred.cpu().numpy()
        except Exception as e:
            print(f"Model name: {model}\nError: {str(e)}")
    predictions.append(y_pred)
#prediction = np.mean(predictions)
#print(f"Ashton Jeanty's predicted fantasy points: {prediction}\nAshton Jeanty's true fantasy points: 10")

In [22]:
# Create the dataset for analysis that includes predictions
for n in range(5):
    if n == 0:
        df = pd.DataFrame(predictions[n],columns=[f'_{n}'])
    else:
        new_df = pd.DataFrame(predictions[n],columns=[f'_{n}'])
        df = df.join(new_df)
df['sum_cols'] = df['_0']+df['_1']+df['_2']+df['_3']+df['_4']
df['prediction'] = df['sum_cols']/5

fin_df = new_df_y.join(df)
fin_df = fin_df.drop(['_0','_1','_2','_3','_4','sum_cols'], axis=1)
fin_df

Unnamed: 0,player_id,player_display_name,season,week,fantasy_points,prediction
0,00-0007091,Matt Hasselbeck,2014,4,-0.20,1.959582
1,00-0007091,Matt Hasselbeck,2014,16,7.04,1.227479
2,00-0007091,Matt Hasselbeck,2014,17,9.40,5.009847
3,00-0007091,Matt Hasselbeck,2015,4,15.38,6.941920
4,00-0007091,Matt Hasselbeck,2015,5,16.22,16.310097
...,...,...,...,...,...,...
60964,00-0039921,Trey Benson,2024,10,8.70,4.843265
60965,00-0039921,Trey Benson,2024,12,1.80,6.160219
60966,00-0039921,Trey Benson,2024,13,2.00,5.015711
60967,00-0039921,Trey Benson,2024,14,1.90,5.021544


In [32]:
# Add absolute error as a metric to easily infer meaning
fin_df['abs_error'] = fin_df['fantasy_points']-fin_df['prediction'] 
fin_df['abs_error'] = np.abs(fin_df['abs_error'])
fin_df.sort_values('abs_error',ascending=False)

Unnamed: 0,player_id,player_display_name,season,week,fantasy_points,prediction,error,abs_error
59754,00-0039040,De'Von Achane,2023,3,47.3,2.562397,44.737603,44.737603
39100,00-0033906,Alvin Kamara,2020,16,53.2,11.279590,41.920410,41.920410
38988,00-0033897,Joe Mixon,2022,9,51.1,9.531981,41.568019,41.568019
31118,00-0032764,Derrick Henry,2018,14,47.8,7.713289,40.086711,40.086711
11691,00-0029104,Jonas Gray,2014,11,44.1,5.828145,38.271855,38.271855
...,...,...,...,...,...,...,...,...
42511,00-0034418,Cedrick Wilson Jr.,2022,15,2.1,2.099679,0.000321,0.000321
13399,00-0029601,Brandon Bostick,2016,5,0.9,0.899679,0.000321,0.000321
40868,00-0034270,Tyler Conklin,2021,1,4.1,4.100308,0.000308,0.000308
17343,00-0030432,Christine Michael,2015,8,2.0,2.000081,0.000081,0.000081


In [33]:
# Look into De'Von Achane's stats to see if there are any other outliers
fin_df[fin_df['player_id']=='00-0039040'].sort_values(['season','week'],ascending=True)
# He missed about 6 weeks in 2023, came back week 11, and scored 0.5 points but was predicted to score 16
# He was coming off of an injury, which means he should be assumed to not be playing at the same level
# This is due to the rolling averages not tracking when players games

Unnamed: 0,player_id,player_display_name,season,week,fantasy_points,prediction,error,abs_error
59754,00-0039040,De'Von Achane,2023,3,47.3,2.562397,44.737603,44.737603
59755,00-0039040,De'Von Achane,2023,4,24.0,15.598704,8.401296,8.401296
59756,00-0039040,De'Von Achane,2023,5,20.5,15.690457,4.809543,4.809543
59757,00-0039040,De'Von Achane,2023,11,0.5,16.210865,15.710865,15.710865
59758,00-0039040,De'Von Achane,2023,13,22.3,12.645218,9.654782,9.654782
59759,00-0039040,De'Von Achane,2023,14,7.1,14.24177,7.14177,7.14177
59760,00-0039040,De'Von Achane,2023,15,6.2,11.761456,5.561456,5.561456
59761,00-0039040,De'Von Achane,2023,16,3.1,11.228066,8.128066,8.128066
59762,00-0039040,De'Von Achane,2023,17,19.7,10.069305,9.630695,9.630695
59763,00-0039040,De'Von Achane,2023,18,12.1,11.214622,0.885378,0.885378
