In [1]:
import pandas as pd
import numpy as np
import math
from tqdm import tqdm
import os
from os.path import join
from utils import get_repo_dir, datetime_str
from config.config import *
from typing import Union, Tuple
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import nn
from torch.nn import functional as F

Using personal config for user: suvansh


## Some initial variables and data loading

In [2]:
max_num_frames = 30  # truncate if snap-to-throw is > this. units: ds
out_dir = join(get_repo_dir(), 'output')
os.makedirs(out_dir, exist_ok=True)
models_dir = join(out_dir, 'models')
os.makedirs(models_dir, exist_ok=True)

In [3]:
# load data
week1_tracking = pd.read_csv(join(data_dir, 'week1_norm.csv'))
week1_coverage = pd.read_csv(join(data_dir, 'coverages_week1.csv'), dtype={'coverage': 'category'})

In [4]:
# merge data
week1_data = pd.merge(week1_tracking, week1_coverage, how='right', on=['gameId', 'playId'])
week1_data['coverage_code'] = week1_data.coverage.cat.codes
num_classes = len(week1_data.coverage.cat.categories)

TODO data aug. this is very slightly complicated by the fact that we have precomputed the vx, vy, ax, ay. will have to ask udit about how this is computed since it seems to be inconsistent with v_theta and v_mag?

for now we don't have orientation relative to qb, so I don't think it makes sense to highlight the qb in the way the bdb winner does for rusher, since it would just be distance from qb and qb-relative speed/accel data right now, which I doubt is _extra_ useful for predicting coverage (?). so for now, we'll do 11x11 of off x def with features:
* relative: x, y, vx, vy, ax, ay
* absolute: vx, vy, ax, ay

## Dataset generation

First, we generate the dataset as numpy arrays.

In [5]:
num_features = 10

grouped = week1_data.groupby(['gameId', 'playId'])
# TODO can change 11s to max number of off and def players in data
data_X = np.empty((len(grouped), max_num_frames, num_features, 11, 11), dtype=np.float32)  # (P, T, F, D, O): play, frame, feature, def, off
data_dims = np.empty((len(grouped), 3), dtype=np.int32) # (P, 3) contains (t, d, o): num frames, num def, num off on each play
data_Y = np.empty(len(grouped), dtype=np.int32)  # (P,)

valid_plays = 0
for game_play, play_df in tqdm(grouped):
    if 'pass_forward' not in play_df.event.unique():
        continue
    first_frame = play_df.loc[(play_df.nflId == 0) & (play_df.event == 'ball_snap')].frameId.iloc[0]
    play_end_frame = play_df.loc[(play_df.nflId == 0) & (play_df.event == 'pass_forward')].frameId.iloc[0]
    last_frame = min(first_frame + max_num_frames - 1, play_end_frame)
    play_df = play_df.loc[play_df.frameId.between(first_frame, last_frame)]
    
    num_def, num_off = 0, 0
    for frame_idx, (frame_id, frame_df) in enumerate(play_df.groupby('frameId')):
        def_ids = frame_df[frame_df.team_pos == 'DEF'].index
        num_def = len(def_ids)
        off_ids = frame_df[frame_df.team_pos == 'OFF'].index
        num_off = len(off_ids)
    
        outer_sub = np.subtract.outer(
            frame_df.loc[off_ids, ['x', 'y', 'v_x', 'v_y', 'a_x', 'a_y']].values,
            frame_df.loc[def_ids, ['x', 'y', 'v_x', 'v_y', 'a_x', 'a_y']].values
        )
        # einsum explanation: the two i's get rid of subtraction across cols
        # k before j reorders def before off since output dims in alph. order
        data_X[valid_plays, frame_idx, :6, :num_def, :num_off] = np.einsum('kiji->ijk', outer_sub)
        data_X[valid_plays, frame_idx, -4:, :num_def, :num_off] = frame_df.loc[def_ids, ['v_x', 'v_y', 'a_x', 'a_y']].values.T[...,None]
    data_Y[valid_plays] = play_df.coverage_code.iloc[0]
    data_dims[valid_plays] = last_frame - first_frame, num_def, num_off
    valid_plays += 1
data_X = data_X[:valid_plays]
data_dims = data_dims[:valid_plays]
data_Y = data_Y[:valid_plays]
    

100%|██████████| 1028/1028 [01:57<00:00,  8.78it/s]


In [6]:
data_save_path = join(out_dir, 'week1_data.npz')
# NOTE: uncomment to save
np.savez(data_save_path, x=data_X, dims=data_dims, y=data_Y)
# NOTE: uncomment to load from save
# saved_data = np.load(data_save_path)
# data_X, data_dims, data_Y = saved_data['x'], saved_data['dims'], saved_data['y']

Next, we make a [TensorDataset](https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#TensorDataset) out of it.

In [7]:
train_X, test_X, train_dims, test_dims, train_Y, test_Y = train_test_split(
    data_X, data_dims, data_Y, test_size=0.2, random_state=12
)
train_dataset = TensorDataset(*map(torch.tensor, [train_X, train_dims, train_Y]))
test_dataset = TensorDataset(*map(torch.tensor, [test_X, test_dims, test_Y]))

## Model construction

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.backends.cudnn.benchmark = True

In [9]:
class DeepCoverInner(nn.Module):
    def __init__(self,
                 input_channels: int,
                 output_dim: int,
                 conv_h: Union[int, Tuple[int, int]]=[128, 96],
                 linear_h: Union[int, Tuple[int, int]]=[96, 256]):
        """
        :param input_channels: number of input features
        :param output_dim: dimension of output embedding
        :param conv_h: number of conv channels for each conv block.
            int or tuple of 2 ints.
        :param linear_h: number of hidden units for each linear layer.
            int or tuple of 2 ints.    
        """
        super().__init__()
        if type(conv_h) is int:
            conv_h = (conv_h, conv_h)
        if type(linear_h) is int:
            linear_h = (linear_h, linear_h)
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(input_channels, conv_h[0], kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(conv_h[0], conv_h[0], kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(conv_h[0], conv_h[0], kernel_size=1),
            nn.ReLU()
        )
        self.bn1 = nn.BatchNorm1d(conv_h[0])
        self.conv_block_2 = nn.Sequential(
            nn.Conv1d(conv_h[0], conv_h[1], kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm1d(conv_h[1]),
            nn.Conv1d(conv_h[1], conv_h[1], kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm1d(conv_h[1]),
            nn.Conv1d(conv_h[1], conv_h[1], kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm1d(conv_h[1]),
        )
        self.conv_h = conv_h
        self.linear_block = nn.Sequential(
            nn.Linear(conv_h[1], linear_h[0]),
            nn.ReLU(),
            nn.BatchNorm1d(linear_h[0]),
            nn.Linear(*linear_h),
            nn.ReLU(),
            nn.LayerNorm(linear_h[1]),
            nn.Linear(linear_h[1], output_dim)
        )
        
        
        
    def forward(self, x, dims):
        # let (F', F") and (..., F*) be conv_h and linear_h args to __init__
        orig_shape = x.shape  # (B, T, F, D, O)
        x = x.view(-1, *orig_shape[2:])  # (B*T, F, D, O)
        
        x = self.conv_block_1(x)  # (B*T, F', D, O)
        x = x.view(*orig_shape[:2], *x.shape[1:])  # (B, T, F', D, O)
        # this block of code handles variable number of offensive players
        x_max = torch.stack([
            F.max_pool2d(each[...,:dim[2]], kernel_size=(1, dim[2])).squeeze() for each, dim in zip(x, dims)
        ])  # (B, T, F', D)
        x_avg = torch.stack([
            F.avg_pool2d(each[...,:dim[2]], kernel_size=(1, dim[2])).squeeze() for each, dim in zip(x, dims)
        ])  # (B, T, F', D)
        x = (x_max * 0.3 + x_avg * 0.7).view(-1, *x_max.shape[2:])  # (B*T, F', D)
        x = self.bn1(x)
        
        x = self.conv_block_2(x)
        x = x.view(*orig_shape[:2], *x.shape[1:])  # (B, T, F", D)
        # this block of code handles variable number of defensive players
        x_max = torch.stack([
            F.max_pool1d(each[...,:dim[1]], kernel_size=dim[1].item()).squeeze() for each, dim in zip(x, dims)
        ])  # (B, T, F")
        x_avg = torch.stack([
            F.avg_pool1d(each[...,:dim[1]], kernel_size=dim[1].item()).squeeze() for each, dim in zip(x, dims)
        ])  # (B, T, F")
        x = (x_max * 0.3 + x_avg * 0.7).view(-1, *x_max.shape[2:])  # (B*T, F")
        
        x = self.linear_block(x)  # (B*T, F*)
        
        # restore shape
        x = x.view(*orig_shape[:2], -1)  # (B, T, F*)
        return x


class DeepCoverOuterLSTM(nn.Module):
    def __init__(self,
                 input_dim: int,
                 hidden_size: int,
                 num_classes: int,
                 num_layers: int=1,
                 bidirectional: bool=True):
        """
        :param input_dim: input embedding dimension (per frame). same as Inner's output_dim
        :param hidden_size: dimension of LSTM hidden state
        :param num_classes: number of coverage classes
        :param num_layers: number of LSTM layers
        :param bidirectional: whether RNN is bidirectional
        """
        super().__init__()
        self.rnn = nn.LSTM(input_dim, hidden_size, batch_first=True,
                           num_layers=num_layers, bidirectional=bidirectional)
        self.num_layers = num_layers
        self.num_directions = 1 + int(bidirectional)
        self.linear = nn.Sequential(
            nn.Linear(self.num_layers * self.num_directions * hidden_size, num_classes),
            nn.LogSoftmax(dim=1)
        )
    
    def forward(self, x, dims):
        # x is (B, T, F*)
        batch_size = x.shape[0]
        x = nn.utils.rnn.pack_padded_sequence(x, dims[:,0], batch_first=True, enforce_sorted=False)
        x = self.rnn(x)[1][0]  # last hidden state
        x = x.view(self.num_layers, self.num_directions, batch_size, -1)
        x = x.permute(2, 0, 1, 3).reshape(batch_size, -1)  # (B, F^)
        # TODO may have to contiguous() this.
        x = self.linear(x)
        return x
    

class DeepCover(nn.Module):
    type_class_map = {
        'LSTM': DeepCoverOuterLSTM
    }
    
    def __init__(self,
                 inner_args: dict,
                 outer_args: dict,
                 outer_type: str='rnn'
                ):
        assert inner_args['output_dim'] == outer_args['input_dim']
        assert (DeepCoverOuter := self.type_class_map.get(outer_type))
        
        super().__init__()
        self.outer = DeepCoverOuter(**outer_args)
        self.inner = DeepCoverInner(**inner_args)
    
    def forward(self, x, dims):
        x = self.inner(x, dims)
        x = self.outer(x, dims)
        return x

In [10]:
# model parameters (TODO sweep?)
embedding_dim = 96
hidden_size = 64
bidirectional = True

model = DeepCover(
    inner_args={
        'input_channels': num_features,
        'output_dim': embedding_dim
    },
    outer_type='LSTM',
    outer_args={
        'input_dim': embedding_dim,
        'hidden_size': hidden_size,
        'num_classes': num_classes,
        'bidirectional': bidirectional
    }
).to(device)

## Training

In [11]:
num_epochs = 10
batch_size = 64
loss_fn = nn.NLLLoss()
save_freq = 1

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

In [12]:
stats = {'train_loss': [], 'test_loss': [], 'train_acc': [], 'test_acc': []}
exp_dir = join(models_dir, datetime_str())
os.makedirs(exp_dir)

try:
    best_test_acc = 0
    for epoch in tqdm(range(num_epochs)):
        """ training """
        model.train()
        train_loss = train_correct = 0
        for xb, dimb, yb in train_loader:
            xb, dimb, yb = xb.to(device), dimb.to(device), yb.to(device)
            y_pred = model(xb, dimb)
            loss = loss_fn(y_pred, yb.long())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * xb.shape[0]
            train_correct += (torch.max(y_pred, 1)[1] == yb).sum().item()
        train_acc = train_correct / len(train_dataset)
        train_loss /= len(train_dataset)

        """ evaluation """
        model.eval()
        test_loss = test_correct = 0
        for xb, dimb, yb in test_loader:
            xb, dimb, yb = xb.to(device), dimb.to(device), yb.to(device)
            y_pred = model(xb, dimb)
            loss = loss_fn(y_pred, yb.long())

            test_loss += loss.item() * xb.shape[0]
            test_correct += (torch.max(y_pred, 1)[1] == yb).sum().item()
        test_acc = test_correct / len(test_dataset)
        test_loss /= len(test_dataset)

        stats['train_loss'].append(train_loss)
        stats['test_loss'].append(test_loss)
        stats['train_acc'].append(train_acc)
        stats['test_acc'].append(test_acc)
        print(f'Epoch {epoch+1:>2}/{num_epochs} | TrLoss {train_loss:>8.5f} | TrAcc {train_acc:>5.2f} | TeLoss {test_loss:>8.5f} | TeAcc {test_acc:>5.2f}')
        if (best_test_acc := max(test_acc, best_test_acc)) == test_acc or \
            (epoch+1) % save_freq == 0 or epoch == num_epochs - 1:  # new best, or save period, or last epoch
            filename = datetime_str() + '.pt'
            torch.save(model.state_dict(), join(exp_dir, filename))
            print(f'Model saved @ {join(exp_dir, filename)}')
            
except KeyboardInterrupt as e:
    if input('Do you want to save the model? [y/N]: ').lower()[0] == 'y':
        filename = datetime_str() + '.pt'
        torch.save(model.state_dict(), join(exp_dir, filename))
        print(f'Model saved @ {join(exp_dir, filename)}')
    raise e

 10%|█         | 1/10 [00:42<06:19, 42.13s/it]

Epoch  1/10 | TrLoss  1.84209 | TrAcc  0.38 | TeLoss  2.06525 | TeAcc  0.18
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055537.pt


 20%|██        | 2/10 [01:23<05:34, 41.81s/it]

Epoch  2/10 | TrLoss  1.54910 | TrAcc  0.50 | TeLoss  1.91125 | TeAcc  0.28
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055618.pt


 30%|███       | 3/10 [02:04<04:52, 41.72s/it]

Epoch  3/10 | TrLoss  1.31429 | TrAcc  0.55 | TeLoss  1.72785 | TeAcc  0.29
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055700.pt


 40%|████      | 4/10 [02:46<04:09, 41.65s/it]

Epoch  4/10 | TrLoss  1.09852 | TrAcc  0.63 | TeLoss  1.40184 | TeAcc  0.48
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055741.pt


 50%|█████     | 5/10 [03:29<03:29, 42.00s/it]

Epoch  5/10 | TrLoss  0.90931 | TrAcc  0.72 | TeLoss  1.17864 | TeAcc  0.59
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055824.pt


 60%|██████    | 6/10 [04:11<02:48, 42.17s/it]

Epoch  6/10 | TrLoss  0.76259 | TrAcc  0.77 | TeLoss  1.14370 | TeAcc  0.58
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055907.pt


 70%|███████   | 7/10 [05:04<02:16, 45.36s/it]

Epoch  7/10 | TrLoss  0.64297 | TrAcc  0.80 | TeLoss  1.11585 | TeAcc  0.58
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_055959.pt


 80%|████████  | 8/10 [05:56<01:34, 47.24s/it]

Epoch  8/10 | TrLoss  0.54006 | TrAcc  0.84 | TeLoss  0.98854 | TeAcc  0.64
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_060051.pt


 90%|█████████ | 9/10 [06:50<00:49, 49.35s/it]

Epoch  9/10 | TrLoss  0.47338 | TrAcc  0.86 | TeLoss  1.06574 | TeAcc  0.59
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_060145.pt


100%|██████████| 10/10 [07:45<00:00, 46.56s/it]

Epoch 10/10 | TrLoss  0.42482 | TrAcc  0.87 | TeLoss  1.13720 | TeAcc  0.58
Model saved @ /Users/sanjeev/GoogleDrive/Personal/Football/DeepCover/output/models/03022021_055455/03022021_060241.pt





# Testing playground (Junk below here)

In [None]:
a = np.random.randn(20, 5) + 5
b = np.random.randn(20, 5)
# result1 = np.empty((1, 20, 20, 5))
# result2 = np.empty((1, 20, 20, 5))
path = np.einsum_path('ikjk->', np.empty((20,5,20,5)), optimize='optimal')[0]

In [None]:
%%timeit -n100 -r10
subout = np.subtract.outer(a, b)
result1 = np.empty((1, 20, 20, 5))
# path = np.einsum_path('ikjk->', subout, optimize='optimal')[0]
result1[0] = np.einsum('ikjk->', subout)

In [None]:
%%timeit -n100 -r10
result2 = np.empty((1, 20, 20, 5))
for i in range(a.shape[0]):
    result2[0, i] = a[i] - b

In [None]:
frame_df = week1_data[(week1_data.gameId == 2018090600) & (week1_data.playId == 75) & (week1_data.frameId == 1)]
# frame_df.values.T[...,None].shape

# test = frame_df[frame_df.team_pos == 'DEF'].index
# len(test)
# frame_df