In [None]:
import os
import pandas as pd

# Define paths
pro_football_focus_data = 'data/pro_football_ref.xlsx'
model_path = 'model_path/trained_model.pth'

# Load and preprocess the pro football focus data.
df = pd.read_excel(pro_football_focus_data)
# Filter to only include wide receivers (WR)
df = df[df['FantPos'].isin(['WR', 'TE'])].copy()
df.replace([float('inf'), -float('inf')], 0, inplace=True)
df.fillna(0, inplace=True)

# Calculate points per game.
df.loc[:, 'PPR/G'] = df['PPR'] / df['G']

print(df.head(10))

  machar = _get_machar(dtype)


    YEAR  Rk             Player   Tm FantPos  Age   G  GS  QBCmp  QBAtt  ...  \
4   2024   5      Ja'Marr Chase  CIN      WR   24  17  16      0      0  ...   
11  2024  12   Justin Jefferson  MIN      WR   25  17  17      1      1  ...   
14  2024  15      George Kittle  SFO      TE   31  15  15      0      0  ...   
17  2024  18  Amon-Ra St. Brown  DET      WR   25  17  17      1      1  ...   
18  2024  19       Brian Thomas  JAX      WR   22  17  16      0      0  ...   
19  2024  20       Brock Bowers  LVR      TE   22  17  16      0      0  ...   
22  2024  23     Terry McLaurin  WAS      WR   29  17  17      0      0  ...   
23  2024  24       Trey McBride  ARI      TE   25  16  16      0      0  ...   
25  2024  26       Drake London  ATL      WR   23  17  17      0      0  ...   
27  2024  28        Jonnu Smith  MIA      TE   29  17   6      0      0  ...   

    RecYds    Y/R  RecTD  Fmb  FL  TotTD  14:00:00  2PP    PPR      PPR/G  
4     1708  13.45     17    0   0     17   

In [2]:
# Copy the 2024 data into separate dataframe.
df_2024 = df[df['YEAR'] == 2024].copy()
player_names_2024 = df_2024['Player'].reset_index(drop=True)
df_2024 = df_2024.drop(columns=['Player'])

print(df_2024.head(10))

    YEAR  Rk   Tm FantPos  Age   G  GS  QBCmp  QBAtt  QBYds  ...  RecYds  \
4   2024   5  CIN      WR   24  17  16      0      0      0  ...    1708   
11  2024  12  MIN      WR   25  17  17      1      1     22  ...    1533   
14  2024  15  SFO      TE   31  15  15      0      0      0  ...    1106   
17  2024  18  DET      WR   25  17  17      1      1      7  ...    1263   
18  2024  19  JAX      WR   22  17  16      0      0      0  ...    1282   
19  2024  20  LVR      TE   22  17  16      0      0      0  ...    1194   
22  2024  23  WAS      WR   29  17  17      0      0      0  ...    1096   
23  2024  24  ARI      TE   25  16  16      0      0      0  ...    1146   
25  2024  26  ATL      WR   23  17  17      0      0      0  ...    1271   
27  2024  28  MIA      TE   29  17   6      0      0      0  ...     884   

      Y/R  RecTD  Fmb  FL  TotTD  14:00:00  2PP    PPR      PPR/G  
4   13.45     17    0   0     17       0.0  0.0  403.0  23.705882  
11  14.88     10    1   0  

In [3]:
# Shift to represent the following year's points per game
df['NextYearPPR/G'] = df.groupby('Player')['PPR/G'].shift(-1)

# Remove rows where the target is NaN (i.e., no following year data)
df = df[df['NextYearPPR/G'].notna()]

print(df.size)

25290


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define features and target.
feature_names = ['Age', 'Tgt', 'Rec', 'RecYds', 'PPR/G']
target = 'NextYearPPR/G'

# Split the data into training, validation, and test sets
X = df[feature_names]
y = df[target]
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
print(f'{X_train}')
print(f'{y_train}')
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
X_2024 = df_2024[feature_names]

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_2024 = scaler.transform(X_2024)

# Ensure X_train, X_val, X_test, and X_2024 are correctly shaped for LSTM
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
X_2024 = X_2024.reshape(X_2024.shape[0], 1, X_2024.shape[1])

# Check to see standardized data.
#print(f'Size {y_train.size}')
#print(f'Size {X_val.size}')
#print(f'Size {y_val.size}')

      Age  Tgt  Rec  RecYds      PPR/G
423    27   87   51     717  11.746154
1666   34   82   52     597   8.835714
1537   27  149  104    1199  16.343750
420    22  110   69     905  10.900000
1381   27   60   30     505   8.258333
...   ...  ...  ...     ...        ...
167    24   71   47     548   7.781250
233    25   86   59     494   7.457143
626    23  146  106    1161  16.725000
1034   29   69   45     556  15.244444
222    32   58   40     390   5.352941

[505 rows x 5 columns]
423     10.629412
1666     8.900000
1537    16.256250
420     10.505882
1381     9.864286
          ...    
167      8.807692
233     10.417647
626     13.370588
1034    15.318750
222      7.766667
Name: NextYearPPR/G, Length: 505, dtype: float64


In [5]:
from nnclass.simple_nn import SimpleLSTM
from torch.utils.data import TensorDataset, DataLoader
import torch

# Create SimpleLTSM.
input_size = X_train.shape[2]
hidden_size = 32
output_size = 1
lstm = SimpleLSTM(input_size, hidden_size, output_size)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [6]:
import torch.optim as optim
import torch.nn as nn

# Train the model.
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm.parameters())

num_epochs = 750
count = 0
for epoch in range(num_epochs):
    lstm.train() # Set model to training mode
    for inputs, labels in train_loader:
        # Forward pass
        outputs = lstm(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        count += 1
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    print(f'Count: {count}')

Epoch [1/750], Loss: 114.9391
Count: 16
Epoch [2/750], Loss: 133.8235
Count: 32
Epoch [3/750], Loss: 137.2437
Count: 48
Epoch [4/750], Loss: 131.5339
Count: 64
Epoch [5/750], Loss: 118.4695
Count: 80
Epoch [6/750], Loss: 158.0957
Count: 96
Epoch [7/750], Loss: 119.2900
Count: 112
Epoch [8/750], Loss: 122.2508
Count: 128
Epoch [9/750], Loss: 109.5540
Count: 144
Epoch [10/750], Loss: 100.6923
Count: 160
Epoch [11/750], Loss: 83.9892
Count: 176
Epoch [12/750], Loss: 111.0381
Count: 192
Epoch [13/750], Loss: 86.5325
Count: 208
Epoch [14/750], Loss: 75.0769
Count: 224
Epoch [15/750], Loss: 77.4620
Count: 240
Epoch [16/750], Loss: 52.8025
Count: 256
Epoch [17/750], Loss: 45.0062
Count: 272
Epoch [18/750], Loss: 69.3775
Count: 288
Epoch [19/750], Loss: 50.9552
Count: 304
Epoch [20/750], Loss: 39.3974
Count: 320
Epoch [21/750], Loss: 44.7584
Count: 336
Epoch [22/750], Loss: 29.7782
Count: 352
Epoch [23/750], Loss: 27.5728
Count: 368
Epoch [24/750], Loss: 33.4538
Count: 384
Epoch [25/750], Loss

In [7]:
# Evaluate the model.
lstm.eval() # Set model to eval mode
total_loss = 0
for inputs, labels in val_loader:
    # Forward pass
    outputs = lstm(inputs)
    loss = criterion(outputs, labels)
    total_loss += loss
print(f'Evaluation, Total Loss: {total_loss.item():.4f}')

Evaluation, Total Loss: 56.1223


In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

y_train_pred = lstm(X_train_tensor).detach().numpy()
y_test_pred = lstm(X_test_tensor).detach().numpy()

# Get the diff values for evaluation.
rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
mae_train = mean_absolute_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)

rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

print(f'Training RMSE: {rmse_train:.4f}, MAE: {mae_train:.4f}, R²: {r2_train:.4f}')
print(f'Testing RMSE: {rmse_test:.4f}, MAE: {mae_test:.4f}, R²: {r2_test:.4f}')

Training RMSE: 2.6535, MAE: 2.0850, R²: 0.6190
Testing RMSE: 3.1983, MAE: 2.5173, R²: 0.3950


In [9]:
# Return results using model.
predictions = lstm(torch.tensor(X_2024, dtype=torch.float32)).detach().numpy()
predictions_df = pd.DataFrame(predictions, columns=['Prediction'])
predictions_df['Player'] = player_names_2024
print(f'{predictions_df}')

     Prediction              Player
0     17.062727       Ja'Marr Chase
1     15.240273    Justin Jefferson
2     11.612712       George Kittle
3     15.051228   Amon-Ra St. Brown
4     12.320720        Brian Thomas
..          ...                 ...
163    4.739200      Elijah Higgins
164    5.135512    Malik Washington
165    5.101123        Derius Davis
166    5.679075        Johnny Mundt
167    4.327011  Darnell Washington

[168 rows x 2 columns]


In [10]:
# Save the new DataFrame to an Excel file
file_path = 'results/simple_lstm_wr.xlsx'
predictions_df.to_excel(file_path, index=False)