In [16]:
import os
import pandas as pd

# Define paths
pro_football_focus_data = 'data/pro_football_ref.xlsx'
results_path = 'results'
model_path = 'models/trained_model.pth'
predictions_path = os.path.join(results_path, 'predictions_2025.xlsx')

# Load and preprocess the pro football focus data.
df = pd.read_excel(pro_football_focus_data)
# Filter to only include wide receivers (WR)
df = df[df['FantPos'] == 'WR'].copy()
df.replace([float('inf'), -float('inf')], 0, inplace=True)
df.fillna(0, inplace=True)

# Calculate points per game.
df.loc[:, 'PPR/G'] = df['PPR'] / df['G']

print(df.head(10))

    YEAR  Rk             Player   Tm FantPos  Age   G  GS  Cmp  Att  ...  \
4   2024   5      Ja'Marr Chase  CIN      WR   24  17  16    0    0  ...   
11  2024  12   Justin Jefferson  MIN      WR   25  17  17    1    1  ...   
17  2024  18  Amon-Ra St. Brown  DET      WR   25  17  17    1    1  ...   
18  2024  19       Brian Thomas  JAX      WR   22  17  16    0    0  ...   
22  2024  23     Terry McLaurin  WAS      WR   29  17  17    0    0  ...   
25  2024  26       Drake London  ATL      WR   23  17  17    0    0  ...   
34  2024  35         Mike Evans  TAM      WR   31  14  14    0    0  ...   
36  2024  37       Malik Nabers  NYG      WR   21  15  13    0    1  ...   
39  2024  40        CeeDee Lamb  DAL      WR   25  15  15    0    0  ...   
41  2024  42   Courtland Sutton  DEN      WR   29  17  13    2    2  ...   

    Yds.2    Y/R  TD.2  Fmb  FL  TD.3  14:00:00  2PP    PPR      PPR/G  
4    1708  13.45    17    0   0    17       0.0  0.0  403.0  23.705882  
11   1533  14.88 

In [17]:
# Copy the 2024 data into separate dataframe.
df_2024 = df[df['YEAR'] == 2024].copy()
player_names_2024 = df_2024['Player'].reset_index(drop=True)
df_2024 = df_2024.drop(columns=['Player'])

print(df_2024.head(10))

    YEAR  Rk   Tm FantPos  Age   G  GS  Cmp  Att  Yds  ...  Yds.2    Y/R  \
4   2024   5  CIN      WR   24  17  16    0    0    0  ...   1708  13.45   
11  2024  12  MIN      WR   25  17  17    1    1   22  ...   1533  14.88   
17  2024  18  DET      WR   25  17  17    1    1    7  ...   1263  10.98   
18  2024  19  JAX      WR   22  17  16    0    0    0  ...   1282  14.74   
22  2024  23  WAS      WR   29  17  17    0    0    0  ...   1096  13.37   
25  2024  26  ATL      WR   23  17  17    0    0    0  ...   1271  12.71   
34  2024  35  TAM      WR   31  14  14    0    0    0  ...   1004  13.57   
36  2024  37  NYG      WR   21  15  13    0    1    0  ...   1204  11.05   
39  2024  40  DAL      WR   25  15  15    0    0    0  ...   1194  11.82   
41  2024  42  DEN      WR   29  17  13    2    2   30  ...   1081  13.35   

    TD.2  Fmb  FL  TD.3  14:00:00  2PP    PPR      PPR/G  
4     17    0   0    17       0.0  0.0  403.0  23.705882  
11    10    1   0    10       0.0  0.0  317.5

In [None]:
# Shift to represent the following year's points per game
df['NextYearPPR/G'] = df.groupby('Player')['PPR/G'].shift(-1)

# Remove rows where the target is NaN (i.e., no following year data)
df = df[df['NextYearPPR/G'].notna()]

print(df.head(20))

    YEAR  Rk              Player   Tm FantPos  Age   G  GS  Cmp  Att  ...  \
4   2024   5       Ja'Marr Chase  CIN      WR   24  17  16    0    0  ...   
11  2024  12    Justin Jefferson  MIN      WR   25  17  17    1    1  ...   
17  2024  18   Amon-Ra St. Brown  DET      WR   25  17  17    1    1  ...   
22  2024  23      Terry McLaurin  WAS      WR   29  17  17    0    0  ...   
25  2024  26        Drake London  ATL      WR   23  17  17    0    0  ...   
34  2024  35          Mike Evans  TAM      WR   31  14  14    0    0  ...   
39  2024  40         CeeDee Lamb  DAL      WR   25  15  15    0    0  ...   
41  2024  42    Courtland Sutton  DEN      WR   29  17  13    2    2  ...   
45  2024  46       Davante Adams  2TM      WR   32  14  14    0    0  ...   
47  2024  48    Jameson Williams  DET      WR   23  15  11    0    0  ...   
49  2024  50  Jaxon Smith-Njigba  SEA      WR   22  17  16    1    1  ...   
50  2024  51         Jerry Jeudy  CLE      WR   25  17  16    0    0  ...   

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define features and target.
feature_names = ['Rk', 'Age', 'TD.3', 'Tgt', 'Rec', 'Yds.2', 'PPR/G']
target = 'NextYearPPR/G'

# Split the data into training, validation, and test sets
X = df[feature_names]
y = df[target]
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
X_2024 = df_2024[feature_names]

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_2024 = scaler.transform(X_2024)

# Ensure X_train, X_val, X_test, and X_2024 are correctly shaped for LSTM
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
X_2024 = X_2024.reshape(X_2024.shape[0], 1, X_2024.shape[1])

# Check to see standardized data.
print(X_train.view())

[[[ 0.07291581  0.8277448   0.07323932 -0.53034047 -0.4587905
   -0.52396337]]

 [[-0.0438196   0.11049521  0.41197116 -0.6620888  -0.57168286
   -0.25288135]]

 [[-0.0438196  -0.60675439 -0.26549252  0.12840122  0.33145605
    0.08668455]]

 ...

 [[ 1.97959417  0.8277448  -1.28168804 -1.18908215 -1.39956019
   -1.30582099]]

 [[ 1.51265253  0.11049521 -0.9429562  -1.50527816 -1.39956019
   -1.14602527]]

 [[-1.35385031 -0.60675439  0.750703    0.41824756  0.67013314
    1.71888366]]]


In [38]:
from nnclass.simple_nn import SimpleLSTM
from torch.utils.data import TensorDataset, DataLoader
import torch

# Use SimpleLTSM and run a pass.
input_size = X_train.shape[2]
hidden_size = 32
output_size = 1
lstm = SimpleLSTM(input_size, hidden_size, output_size)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Train the model.
lstm.train()
for inputs, targets in train_loader:
    outputs = lstm(inputs)

# Evaluate the model.
for inputs, targets in val_loader:
    outputs = lstm(inputs)
    print(f"target: {targets}, input: {inputs}")

target: tensor([[14.6375],
        [ 7.1727],
        [10.5813],
        [21.1875],
        [ 5.7125],
        [19.7188],
        [14.1857],
        [ 8.4933],
        [10.9875],
        [ 9.4333],
        [11.8875],
        [14.2200],
        [17.0333],
        [ 8.3929],
        [ 8.9167],
        [11.1000],
        [ 7.4250],
        [14.8692],
        [ 7.9375],
        [ 8.5000],
        [ 7.4750],
        [ 9.2600],
        [11.6938],
        [ 8.1909],
        [ 9.1250],
        [ 8.9600],
        [ 7.0687],
        [13.8462],
        [19.4353],
        [10.2467],
        [ 3.4176],
        [10.6267]]), input: tensor([[[ 0.4620,  2.2622, -0.2655, -0.7148, -0.5717, -0.5554]],

        [[ 1.5256,  0.4691, -0.9430, -1.2945, -1.2490, -1.3229]],

        [[ 0.5788, -0.9654, -0.9430, -0.7411, -0.7222, -0.4526]],

        [[-0.2384, -0.2481,  0.0732,  0.1284, -0.0449, -0.3071]],

        [[-0.1995,  0.1105,  0.0732,  0.0230, -0.3835,  0.1010]],

        [[-1.5873, -0.2481,  1.4282,  2.