In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import seaborn as sns
import numpy as np
import pandas as pd
import nfl_data_py as nfl

# Import Data

In [None]:
years = range(2002, 2024)
cache_path = '/storage1/fs1/chien-ju.ho/Active/nfl'

In [None]:
%%time
for year in years:
    nfl.cache_pbp([year], downcast=False, alt_path=cache_path)
    time.sleep(5)

In [None]:
!du -sh $cache_path

In [None]:
def process(df):
    df = df[columns]
    df = df[df.posteam_type.notna() & df.score_differential.notna()].copy()
    df['down'] = df.down.astype(str)
    df['posteam_type'] = df.posteam_type == 'home'
    df['yardline_na'] = df.yardline_100.isna()
    df['yardline_100'] = df.yardline_100.fillna(100)
    df['result'] = df['result'] > 0
    return df

In [None]:
%%time

columns = ['posteam_type', 'game_seconds_remaining', 'score_differential', 'yardline_100', 'down', 'ydstogo', 'result', 'drive_ended_with_score']

train = nfl.import_pbp_data(range(2002,2022), columns, downcast=False, cache=True, alt_path=cache_path)
valid = nfl.import_pbp_data([2022], columns, downcast=False, cache=True, alt_path=cache_path)
test = nfl.import_pbp_data([2023], columns, downcast=False, cache=True, alt_path=cache_path)

In [None]:
train = process(train)
valid = process(valid)
test = process(test)

# Analyze Data

In [None]:
sns.lineplot(train[train.down!='nan'], x='yardline_100', y='drive_ended_with_score', hue='down')

# Train Torch Model

In [None]:
def torch_process(df):
    df = pd.get_dummies(df, columns=['down'])
    x = torch.tensor(df.drop(columns=['result', 'drive_ended_with_score']).values.astype(np.float32))
    y = torch.tensor(df[['result']].values.astype(np.float32))
    return x, y
    

In [None]:
x_train, y_train = torch_process(train)
x_valid, y_valid = torch_process(valid)
x_test, y_test = torch_process(test)

In [None]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.output = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.sigmoid(self.output(x))
        return x


In [None]:
# Set hyperparameters
input_size = x_train.shape[1]
hidden_size = 64
learning_rate = 0.001
num_epochs = 100

In [None]:
# Initialize the model
model = Net(input_size, hidden_size)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(x_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 10 epochs
    if (epoch + 1) % 1 == 0:
        with torch.no_grad():
            outputs = model(x_valid)
            predicted = outputs.round()
            accuracy = (predicted == y_valid).float().mean()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f}")