In [8]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from torch.autograd import Variable
import torch.nn.functional as functional
from torch.utils.data import Dataset, DataLoader
from skimage import io
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder

In [2]:
class FaceoffsDataset(Dataset):

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        data = self.data.iloc[idx, 1:]
        data = np.array([data])
        data = data.reshape(-1, 2)

        if self.transform:
            data = self.transform(data)

        return data

In [3]:
# General Data Directory ##TODO: Please fill in the appropriate directory
# Uncomment appropriate directory based on whether using local laptop, Google Colab, or AWS EC2 Instance.
#data_dir = "/content/gdrive/MyDrive/HW6_data (1)/HW6_data"
#data_dir = "/home/ubuntu/unsupervised-and-transfer-learning/hw6_data_copy/HW6_data"
#data_dir = "C:/Users/Tad/Documents/unsupervised-and-transfer-learning/hw6_data_copy/HW6_data"
#data_dir = "C:/Users/Tad/Documents/unsupervised-and-transfer-learning/HW6_data"
data_dir = "/Users/Tad/Documents/faceoffs"
segmentation_data_dir = f"{data_dir}/segmentation/"
colorization_data_dir = f"{data_dir}/colorization/"



In [26]:
data_initial = pd.read_csv("training_data_all_offensive_offensive.csv")

In [28]:
data_no_na = data_initial.dropna() # data should already have no NAs due to numerical imputation
data = data_no_na.select_dtypes(['number'])
print(data.head(100))
x = data.loc[:, data.columns != 'net_xg']
x = x.loc[:, x.columns != 'net_xg']
y = data['net_xg']

X_train, X_intermediate, y_train, y_intermediate = train_test_split(x, y, train_size = 0.65, test_size=0.35, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_intermediate, y_intermediate, test_size=0.571, random_state=42)
# Above two lines in unison accomplish a 65-15-20 split for train-test-validation

X_train = torch.from_numpy(X_train.to_numpy())
X_val = torch.from_numpy(X_val.to_numpy())
X_test = torch.from_numpy(X_test.to_numpy())

y_train = torch.from_numpy(y_train.to_numpy())
y_val = torch.from_numpy(y_val.to_numpy())
y_test = torch.from_numpy(y_test.to_numpy())

print(X_train.shape)

    game_id_x  season_x  game_seconds  event_index  game_period  coords_x  \
0   -1.798257 -1.797913     -0.715929    -0.712210    -0.606279 -0.531820   
1   -1.798257 -1.797913     -0.697547    -0.677103    -0.606279 -0.531820   
2   -1.798257 -1.797913      0.039564     0.060154    -0.606279 -0.531820   
3   -1.798257 -1.797913      0.210515     0.255753    -0.606279  1.880391   
4   -1.798257 -1.797913      0.445803     0.471414     0.764344 -0.531820   
..        ...       ...           ...          ...          ...       ...   
95  -1.798252 -1.797913     -0.357483    -0.326028    -0.606279  1.880391   
96  -1.798252 -1.797913      0.458670     0.476429     0.764344  1.880391   
97  -1.798252 -1.797913      0.554256     0.556675     0.764344  1.880391   
98  -1.798252 -1.797913      0.604806     0.596797     0.764344 -0.531820   
99  -1.798252 -1.797913      0.816197     0.772335     0.764344  1.880391   

    coords_y  home_score  away_score  event_distance  ...  OL_team_Lose_D2 

torch.Size([60387, 30])


In [4]:
class VanillaNeuralNet(nn.Module):

    def __init__(self, n_classes):
        super(VanillaNeuralNet, self).__init__()
        self.fc1 = nn.Linear(1500, 1000)
        self.fc2 = nn.Linear(1000, 750)
        self.fc3 = nn.Linear(750, 500)
        self.fc4 = nn.Linear(500, 500)
        self.fc5 = nn.Linear(500, 500)
        self.fc6 = nn.Linear(500, 500)
        self.fc7 = nn.Linear(500, 250)
        self.fc8 = nn.Linear(250, 100)
        self.fc9 = nn.Linear(100, 50)
        self.fc10 = nn.Linear(50, 1) # output singular prediction
    
    def forward(self, x):
        x_1 = self.fc1(torch.nn.functional.relu(x))
        x_2 = self.fc2(torch.nn.functional.relu(x_1))
        x_3 = self.fc3(torch.nn.functional.relu(x_2))
        x_4 = self.fc4(torch.nn.functional.relu(x_3))
        x_5 = self.fc5(torch.nn.functional.relu(x_4))
        x_6 = self.fc6(torch.nn.functional.relu(x_5))
        x_7 = self.fc7(torch.nn.functional.relu(x_6))
        x_8 = self.fc8(torch.nn.functional.relu(x_7))
        x_9 = self.fc9(torch.nn.functional.relu(x_8))
        x_10 = self.fc10(torch.nn.functional.relu(x_9))
        return x_10