## WR_FFPG_NN
Artificial Neural Network (ANN) that predicts Fantasy Football Points per Game (FFPG) based on provided Wide Receiver (WR) metrics.
ANN trained on WR data from 2018-2022 NFL seasons. Same model then tested on 2023 season.
ANN performance compared against polynomial regression method (polyfit).

Made by: Nikhil Gupta

### Libraries

In [12]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
import numpy as np
import plotly.graph_objects as go

### Set-up

In [13]:
# Set device for PyTorch
device = torch.device("cpu")  # Simple and efficient for this use case

# Set dataset filenames
train_filename = "wr_data_18-22.csv"
valid_filename = "wr_data_23.csv"

# Define model filename
model_filename = "ffpg_model.pt"

# Initialize I/O metrics
input_metrics = ['targets_per_route_run', 'yards_per_route_run',
'air_yards_share', 'target_share', 'yards_after_catch_per_reception']
output_metrics = ['fantasy_points_per_game']

# Set hyperparameters
n_hidden = [2] # Width of each hidden layer

### Define custom dataset

In [14]:
class CustomDataset(Dataset):
    """Custom dataset for wide receiver fantasy points per game prediction."""
    def __init__(self, filename, input_metrics, output_metrics):
        """
        Args:
            filename (str): Path to the CSV file containing the dataset.
            input_metrics (list): List of input feature names.
            output_metrics (list): List of output target names.
        """
        # Read in dataset
        df = pd.read_csv(filename).sort_values(by=output_metrics, ascending=False)
        
        # Format metrics
        df['targets_per_route_run'] = df['targets_per_route_run']*100
        df['yards_per_route_run'] = df['yards_per_route_run']*10
        
        # Save off instance variables
        self.names = df['name'].astype('string').values
        self.seasons = df['season'].astype('string').values
        self.X = torch.tensor(df[input_metrics].values, dtype=torch.float32)
        self.Y = torch.tensor(df[output_metrics].values, dtype=torch.float32)
        
    def __len__(self):
        """Return the number of samples in the dataset."""
        return len(self.X)
    
    def __getitem__(self, index):
        """Return a single sample from the dataset.
        Args:
            index (int): Index of the sample to retrieve.
        Returns:
            tuple: A tuple containing the input features and the target value.
        """
        return self.X[index], self.Y[index]

### Define ANN

In [15]:
class SimpleNN(nn.Module):
    """A simple feedforward neural network for regression tasks."""
    def __init__(self, num_features, n_hidden):
        """ Args:
            num_features (int): Number of input features.
            n_hidden (list): List containing the number of neurons in each hidden layer.
        """
        super().__init__()
        
        self.hidden1 = nn.Linear(num_features, n_hidden[0])
        self.act1 = nn.LeakyReLU()
        self.output = nn.Linear(n_hidden[0], 1)
        self.act_output = nn.ReLU()
        
    def forward(self, x):
        """Forward pass through the network.
        Args:
            x (torch.Tensor): Input tensor.
        Returns:
            torch.Tensor: Output tensor after passing through the network.
        """
        x = self.act1(self.hidden1(x))
        return self.act_output(self.output(x))

### Define training loop

In [17]:
def train_model(filename, input_metrics, output_metrics, n_hidden, epsilon=1, batch_size=32, epochs=500):
    """Train a neural network model on the provided dataset.
    Args:
        filename (str): Path to the CSV file containing the dataset.
        input_metrics (list): List of input feature names.
        output_metrics (list): List of output target names.
        n_hidden (list): List containing the number of neurons in each hidden layer.
        epsilon (float): Threshold for stopping training based on loss.
        batch_size (int): Size of each batch for training.
        epochs (int): Maximum number of epochs to train.
    Returns:
        tuple: A tuple containing the trained model, dataset, and final loss.
    """
    # Create dataset and dataloader
    dataset = CustomDataset(filename, input_metrics, output_metrics)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Model, loss function, and optimizer
    num_features = len(input_metrics)
    model = SimpleNN(num_features, n_hidden)
    model.to(device) # send model to GPU
    loss_func = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())
    
    # Training loop
    loss = torch.inf
    epoch = -1
    while loss > epsilon and epoch < epochs:
        for batch_idx, (X_batch, Y_batch) in enumerate(dataloader):
            # send tensors to GPU
            X_batch, Y_batch = X_batch.to(device), Y_batch.to(device)
        
            # Forward pass
            outputs = model(X_batch)
            loss = loss_func(outputs, Y_batch)
            
            # Backward pass and optimize       
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        epoch += 1
        print(f'Finished epoch {epoch}, latest loss {loss}')
    
    return model.cpu(), dataset, loss

### Train model

In [20]:
# Check if saved model exists
if os.path.exists(model_filename):
    print(f"Loading existing model from {model_filename}")
    
    # Create model architecture (must match saved model)
    num_features = len(input_metrics)
    model = SimpleNN(num_features, n_hidden)
    
    # Load the saved parameters
    model.load_state_dict(torch.load(model_filename, map_location='cpu'))
    model.eval()  # Set to evaluation mode
    
    # Load the training dataset for consistency
    train_dataset = CustomDataset(train_filename, input_metrics, output_metrics)
    
    print("✅ Model loaded successfully!")
    
else:
    print(f"No existing model found. Training new model...")
    
    # Train new model (your existing training code)
    model, train_dataset, loss = train_model(train_filename, input_metrics, output_metrics, n_hidden)
    
    # Save the trained model parameters
    torch.save(model.state_dict(), model_filename)
    print(f"✅ Model trained and saved to {model_filename}")
    print(f"Final training loss: {loss:.4f}")

No existing model found. Training new model...
Finished epoch 0, latest loss 95.07695770263672
Finished epoch 1, latest loss 96.96527099609375
Finished epoch 2, latest loss 118.69868469238281
Finished epoch 3, latest loss 44.416744232177734
Finished epoch 4, latest loss 61.55133819580078
Finished epoch 5, latest loss 16.892311096191406
Finished epoch 6, latest loss 16.864465713500977
Finished epoch 7, latest loss 17.58455467224121
Finished epoch 8, latest loss 12.345048904418945
Finished epoch 9, latest loss 19.581405639648438
Finished epoch 10, latest loss 11.38807201385498
Finished epoch 11, latest loss 11.037644386291504
Finished epoch 12, latest loss 19.58669090270996
Finished epoch 13, latest loss 14.14226245880127
Finished epoch 14, latest loss 9.368013381958008
Finished epoch 15, latest loss 13.987322807312012
Finished epoch 16, latest loss 7.082657337188721
Finished epoch 17, latest loss 9.047636032104492
Finished epoch 18, latest loss 14.125322341918945
Finished epoch 19, late

### Visualize training performance

In [26]:
# Get training predictions
train_x = train_dataset.X
with torch.no_grad():
    train_y_pred = model(train_x)
train_samples = range(len(train_dataset.X))
train_y = train_dataset.Y
train_names = train_dataset.names
train_seasons = train_dataset.seasons

# Create Plotly figure
fig = go.Figure()

# Add truth points
fig.add_trace(go.Scatter(
    x=list(train_samples), 
    y=train_y.flatten(), 
    mode='markers', 
    name='Truth FFPG',
    text=[f'{name} ({season}): {val:.2f}' for name, season, val in zip(train_names, train_seasons, train_y.flatten())],
    hovertemplate='%{text}<extra></extra>',
    marker=dict(size=6)
))

# Add predicted points
fig.add_trace(go.Scatter(
    x=list(train_samples), 
    y=train_y_pred.flatten(), 
    mode='markers', 
    name='Pred FFPG',
    opacity=0.3,
    text=[f'{name} ({season}): {val:.2f}' for name, season, val in zip(train_names, train_seasons, train_y_pred.flatten())],
    hovertemplate='%{text}<extra></extra>',
    marker=dict(size=6)
))

# Add regression line
coeffs = np.polyfit(train_samples, train_y_pred.flatten(), 3)
poly = np.poly1d(coeffs)
fig.add_trace(go.Scatter(
    x=list(train_samples),
    y=poly(train_samples),
    mode='lines',
    name='Regression',
    line=dict(color='black', width=2),
    hovertemplate='Regression line<extra></extra>'
))

# Update layout
fig.update_layout(
    title='Training | WR data: 2018-2022',
    xaxis_title='WR samples',
    yaxis_title='Fantasy Points per Game',
    legend=dict(x=1, y=1, xanchor='right', yanchor='top'),
    showlegend=True,
    hovermode='closest',
    width=1000,
    height=800
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')

fig.show()

### Validate

In [27]:
# Create validation dataset and dataloader
valid_dataset = CustomDataset(valid_filename, input_metrics, output_metrics)
valid_x = valid_dataset.X
valid_y = valid_dataset.Y

# Get validation predictions
with torch.no_grad():
    valid_y_pred = model(valid_x)
valid_samples = range(len(valid_dataset.X))
valid_names = valid_dataset.names
valid_seasons = valid_dataset.seasons

### Visualize validation performance

In [29]:
# Create Plotly figure
fig = go.Figure()

# Add truth points
fig.add_trace(go.Scatter(
    x=list(valid_samples), 
    y=valid_y.flatten(), 
    mode='markers', 
    name='Truth FFPG',
    text=[f'{name} ({season}): {val:.2f}' for name, season, val in zip(valid_names, valid_seasons, valid_y.flatten())],
    hovertemplate='%{text}<extra></extra>',
    marker=dict(size=6)
))

# Add predicted points
fig.add_trace(go.Scatter(
    x=list(valid_samples), 
    y=valid_y_pred.flatten(), 
    mode='markers', 
    name='Pred FFPG',
    opacity=0.5,
    text=[f'{name} ({season}): {val:.2f}' for name, season, val in zip(valid_names, valid_seasons, valid_y_pred.flatten())],
    hovertemplate='%{text}<extra></extra>',
    marker=dict(size=6)
))

# Add regression line
coeffs = np.polyfit(valid_samples, valid_y_pred.flatten(), 3)
poly = np.poly1d(coeffs)
fig.add_trace(go.Scatter(
    x=list(valid_samples),
    y=poly(valid_samples),
    mode='lines',
    name='Regression',
    line=dict(color='black', width=2),
    hovertemplate='Regression line<extra></extra>'
))

# Update layout with square dimensions
fig.update_layout(
    title='Validation | WR data: 2023',
    xaxis_title='WR samples',
    yaxis_title='Fantasy Points per Game',
    legend=dict(x=1, y=1, xanchor='right', yanchor='top'),
    showlegend=True,
    hovermode='closest',
    width=1000,   
    height=800
)

# Add grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')

fig.show()

### Conclusion:
Results from training and testing show 5 chosen metrics show promising correlation to predicting FFPG. Simple polynomial regression does not adequately represent higher and lower end performers.