In [48]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
import ignite
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import keras
pd.set_option("display.max_columns", 101)

np.random.seed(123)
torch.manual_seed(123)
#https://www.analyticsvidhya.com/blog/2019/01/guide-pytorch-neural-networks-case-studies/

<torch._C.Generator at 0x10db786f0>

In [2]:
from sklearn.neural_network import MLPClassifier

df = pd.read_csv('PBP - 2016 - Week 1.csv')

In [3]:
def getWinner(gameId, df):
    final = df.query('gameId == @gameId').iloc[[-1]]
    if(int(final.homeScore) > int(final.awayScore)):
        return 'home'
    else:
        return 'away'

In [447]:
x_train = bigdf['description']
for index,row in bigdf.iterrows():
    bigdf.at[index,'winner'] = getWinner(row['gameId'], bigdf)
y_train = bigdf['winner']

In [448]:
x_train = x_train.values
y_train = y_train.values

In [449]:
x_train

array(['Drew Galitz kickoff for 61 yds , Shakeir Ryan return for 10 yds to the NWSt 14',
       'Timeout NORTHWESTERN ST, clock 14:55',
       "De'Mard Llorens run for a loss of 1 yard to the NWSt 13", ...,
       'Gunnar Raborn on-side kick recovered by UL LAFAYETTE at the McNSt 45',
       'TEAM run for a loss of 6 yards to the LaLaf 49',
       'End of 4th Quarter'], dtype=object)

## Preprocessing

In [450]:
## create tokens 
tokenizer = keras.preprocessing.text.Tokenizer(num_words = 1000)
tokenizer.fit_on_texts(x_train)
word_index = tokenizer.word_index

In [451]:
## ensure that all the data is actually text
new_x_train = []
for i in x_train:
    new_x_train.append(str(i))
    
x_train = new_x_train

In [452]:
## convert texts to padded sequences 
x_train = tokenizer.texts_to_sequences(x_train)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen = 70)

In [10]:
EMBEDDING_FILE = 'glove.840B.300d.txt'

embeddings_index = {}
for i, line in enumerate(open(EMBEDDING_FILE, encoding='utf8')):
    val = line.split()
    embeddings_index[val[0]] = np.asarray(val[1:], dtype='float32')
    


ValueError: could not convert string to float: '.'

In [453]:
embedding_matrix = np.zeros((len(word_index) + 1, 300))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [454]:
embedding_matrix.size

1876200

In [455]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        ## Embedding Layer, Add parameter 
        self.embedding = nn.Embedding(2000, 1415100) 
        et = torch.tensor(embedding_matrix, dtype=torch.float32)
        self.embedding.weight = nn.Parameter(et)
        self.embedding.weight.requires_grad = False
        self.embedding_dropout = nn.Dropout2d(0.1)
        self.lstm = nn.LSTM(300, 40)        
        self.linear = nn.Linear(40, 16)
        self.out = nn.Linear(16, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        h_embedding = self.embedding(x)        
        h_lstm, _ = self.lstm(h_embedding)
        max_pool, _ = torch.max(h_lstm, 1)        
        linear = self.relu(self.linear(max_pool))
        out = self.out(linear)
        return out
    
model = Model()

In [456]:
## create training and validation split 
split_size = int(0.8 * len(bigdf))
index_list = list(range(len(bigdf)))
train_idx, valid_idx = index_list[:split_size], index_list[split_size:]

In [457]:
y_train = bigdf['winner']
y_train = y_train.values

In [458]:
y_train = np.where(y_train=='home', 1, y_train) 
y_train = np.where(y_train=='away', 0, y_train) 
y_train = y_train.astype(np.int32)

In [459]:
x_train[train_idx]

array([[  0,   0,   0, ...,   3, 450,  47],
       [  0,   0,   0, ...,  39,  47, 435],
       [  0,   0,   0, ...,   3, 450,  50],
       ...,
       [  0,   0,   0, ...,   7,  36,  32],
       [  0,   0,   0, ...,   1, 442,   4],
       [  0,   0,   0, ...,   3, 190,  52]], dtype=int32)

In [460]:
## create iterator objects for train and valid datasets
x_tr = torch.tensor(x_train[train_idx], dtype=torch.long)
y_tr = torch.tensor(y_train[train_idx], dtype=torch.float32)
train = TensorDataset(x_tr, y_tr)

## Just trying to get a good weight
test = torch.tensor(y_train[train_idx], dtype=torch.int)
print(test)
class_sample_count = np.unique(test, return_counts=True)[1]
print(class_sample_count)
weight = 1. / class_sample_count

print(weight)
samples_weight = weight[test]
print(len(samples_weight))

trainloader = torch.utils.data.DataLoader(train, batch_size=1,sampler=torch.utils.data.sampler.WeightedRandomSampler(samples_weight, len(samples_weight)))


tensor([1, 1, 1,  ..., 1, 1, 1], dtype=torch.int32)
[ 4338 19413]
[2.30520977e-04 5.15118735e-05]
23751


In [335]:
x_val = torch.tensor(x_train[valid_idx], dtype=torch.long)
y_val = torch.tensor(y_train[valid_idx], dtype=torch.float32)
valid = TensorDataset(x_val, y_val)
validloader = torch.utils.data.DataLoader(valid, batch_size=1)

In [461]:
loss_function = nn.BCEWithLogitsLoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)

In [472]:
## run for 10 Epochs
for epoch in range(10):
    train_loss, valid_loss = [], []
    ## training part 
    model.train()
    i=0
    running_loss = 0
    for data, target in trainloader:
        i+=1
        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target.view(-1,1))
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

[1,  2000] loss: 0.425
[1,  4000] loss: 0.413
[1,  6000] loss: 0.410
[1,  8000] loss: 0.396
[1, 10000] loss: 0.421
[1, 12000] loss: 0.397
[1, 14000] loss: 0.411
[1, 16000] loss: 0.417
[1, 18000] loss: 0.399
[1, 20000] loss: 0.409
[1, 22000] loss: 0.425
[2,  2000] loss: 0.422
[2,  4000] loss: 0.422
[2,  6000] loss: 0.402
[2,  8000] loss: 0.419
[2, 10000] loss: 0.410
[2, 12000] loss: 0.401
[2, 14000] loss: 0.386
[2, 16000] loss: 0.395
[2, 18000] loss: 0.415
[2, 20000] loss: 0.409
[2, 22000] loss: 0.411
[3,  2000] loss: 0.399
[3,  4000] loss: 0.412
[3,  6000] loss: 0.396
[3,  8000] loss: 0.410
[3, 10000] loss: 0.399
[3, 12000] loss: 0.404
[3, 14000] loss: 0.383
[3, 16000] loss: 0.401
[3, 18000] loss: 0.408
[3, 20000] loss: 0.406
[3, 22000] loss: 0.414
[4,  2000] loss: 0.396
[4,  4000] loss: 0.393
[4,  6000] loss: 0.394
[4,  8000] loss: 0.390
[4, 10000] loss: 0.388
[4, 12000] loss: 0.387
[4, 14000] loss: 0.395
[4, 16000] loss: 0.407
[4, 18000] loss: 0.403
[4, 20000] loss: 0.402
[4, 22000] 

In [473]:
total_correct = 0
total_images = 0
confusion_matrix = np.zeros([2,2], int)
preds = np.empty((0,1))
lebs = np.empty((0,1))
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        outputs = model(images)
        bleh = outputs
        predicted = torch.clamp(bleh, min=0, max=1).float()
        total_images += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        preds = np.append(preds,predicted)
        lebs = np.append(lebs,labels)

print(total_correct)
model_accuracy = total_correct / total_images * 100
print('Model accuracy on {0} test images: {1:.2f}%'.format(total_images, model_accuracy))

16230
Model accuracy on 23751 test images: 68.33%


In [474]:
from sklearn.metrics import roc_auc_score
roc_auc_score(lebs, preds)

0.8555695352567209

In [292]:
softmax = nn.Softmax(dim=1)

In [310]:
total_correct = 0
total_images = 0
confusion_matrix = np.zeros([2,2], int)
preds = np.empty((0,1))
lebs = np.empty((0,1))
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        print(images)
        outputs = model(images)
        print(outputs)
        bleh = outputs
        predicted = torch.clamp(bleh, min=0, max=1).float()
        total_images += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        preds = np.append(preds,predicted)
        lebs = np.append(lebs,labels)

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   5,   1,  20,   4,   2,   3, 237,  93]])
tensor([[0.4914]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         112, 146,   5,   1,  91,   4,   2,   3, 335,  31,   1,   7,   8,   9]])
tensor([[2.2992]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 348, 301,
          22,   1,  52,   4,  99, 102,  14, 244,  56, 450,  32,   3, 323,  82]])
tensor([[-2.2179]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 328,  22,   1,  58,   4,  98,   1,  17,   4,   2,   3, 288,  70]])
tensor([[3.2796]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,  43,  67,   5,   1,  18,  21,   2,   3, 190,  55]])
tensor([[-5.0796]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 103, 728,
           6,  10,   2,   1,  35,   4,   2,   3, 431,  70,   1,   7,   8,   9]])
tensor([[3.8561]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         

tensor([[-0.3349]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 313, 515,   6, 150, 337,  27,   1,  20,   4,   2,   3, 296,  58]])
tensor([[-0.7913]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0, 654, 307,   6,  11,   2,  79]])
tensor([[0.4683]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,

tensor([[-2.8248]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0, 108, 545,   5,   1,  24,   4,   2,   3, 316,  82]])
tensor([[-2.0827]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   5,   1,  18,
          21,   2,   3, 303,  80, 104, 182,  14, 393, 107,  14, 303, 153, 133]])
tensor([[0.6709]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,

tensor([[-4.6017]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 340,   6,  11,   2]])
tensor([[-4.8289]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 450,   5,   1,  36,   4,   2,   3, 171,  61,   1,   7,   8,   9]])
tensor([[8.4038]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,

tensor([[4.8353]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0, 802, 602,   6,  11,   2, 263,
          19,   6, 159, 712, 897,  33,  12,   2,   3,   8,   9, 212,  23,  89]])
tensor([[5.3921]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0, 263,   5,   1,  31,   4,   2,   3, 421,  30]])
tensor([[0.3098]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  

tensor([[0.0606]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 145,
         436,  22,   1,  51,   4,  99, 102,  14, 636, 636,  32,   3, 286,  81]])
tensor([[-2.5536]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0, 269,   5,   1,  18,  21,   2,   3, 149,  55]])
tensor([[-1.6700]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,

tensor([[-0.3209]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 480,   6,  10,   2, 930,  96,   1,  15,   4,   2,   3, 315,  93]])
tensor([[0.9393]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 486,
         485,   6,  11,  19, 161, 162,  33,  12,   2,   3,  23,  89,   8,   9]])
tensor([[1.7601]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,  72, 264,  25,   1, 595,   4,  46,  27,   1,  33,   4,   2,   3,
         117,  41, 143, 868, 839,  19, 186, 115,  29,  12,   2,   3, 231,  29]])
tensor([[5.3212]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 257,   5,   1,  17,   4,   2,   3, 270,  71,   1,   7,   8,   9]])
tensor([[0.6613]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 649,  25,   1,  40,   4,  27,   1,  55,   4,   2,   3, 370,  55]])
tensor([[0.8553]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0, 760,  22,   1, 596,   4, 123,  32,   3, 321,  88]])
tensor([[0.0544]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          

tensor([[2.6296]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0, 169,  44,   5,   1,  15,   4,   2,   3, 395,  45]])
tensor([[-4.0243]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0, 165,  19, 105, 106,  15,  12,   2,   3, 190,  63]])
tensor([[-6.1990]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,

tensor([[1.3917]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0, 192,   5,   1,  24,   4,   2,   3, 218,  63]])
tensor([[0.7267]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0, 196, 795,   5,   1,  26,   4,   2,   3, 294,  70]])
tensor([[0.1698]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  

tensor([[-1.0882]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 881,
          25,   1, 378,   4, 885, 379,  27,   1,  42,   4,   2,   3, 254,  33]])
tensor([[-0.8725]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0, 126, 405,   6,  11,   2, 113, 694]])
tensor([[-5.1657]])
tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0

KeyboardInterrupt: 

In [258]:

model(bleh2)

tensor([[1.7086]], grad_fn=<AddmmBackward>)

In [325]:
def get_prediction(string):
    #need to embed the string
    string = tokenizer.texts_to_sequences(string)
    string = keras.preprocessing.sequence.pad_sequences(string, maxlen = 70)
    print(string)
    x_val = torch.tensor(string, dtype=torch.long)
    print(x_val)
    return (model(x_val))

In [327]:
get_prediction('Drew Galitz kickoff for 61 yds , Shakeir Ryan return for 10 yds to the NWSt 14')

[[  0   0   0 ...   0   0 181]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0  18]
 [  0   0   0 ...   0   0  24]]
tensor([[  0,   0,   0,  ...,   0,   0, 181],
        [  0,   0,   0,  ...,   0,   0,   0],
        [  0,   0,   0,  ...,   0,   0,   0],
        ...,
        [  0,   0,   0,  ...,   0,   0,   0],
        [  0,   0,   0,  ...,   0,   0,  18],
        [  0,   0,   0,  ...,   0,   0,  24]])


tensor([[ 0.2643],
        [-1.0255],
        [-1.1859],
        [-1.1869],
        [-1.1790],
        [-1.1686],
        [-0.0049],
        [-0.9463],
        [-1.0786],
        [ 1.5034],
        [-0.3652],
        [-0.9083],
        [-1.0803],
        [-1.1429],
        [ 0.8450],
        [-0.7805],
        [-1.0007],
        [-1.0817],
        [-1.1110],
        [-1.1238],
        [-1.1299],
        [-1.1338],
        [-1.1356],
        [-1.1362],
        [-2.0255],
        [-1.6550],
        [-1.1539],
        [-1.1431],
        [ 1.9009],
        [ 1.4642],
        [-0.4474],
        [-0.9243],
        [-1.0514],
        [ 1.1385],
        [-0.8352],
        [ 0.0217],
        [-0.9051],
        [-1.0356],
        [-1.1110],
        [-1.1357],
        [-1.1396],
        [-1.1374],
        [-1.1363],
        [ 0.0444],
        [-0.9209],
        [-1.0654],
        [-1.1187],
        [-1.1373],
        [ 1.5381],
        [-0.3900],
        [-0.9029],
        [-1.0760],
        [-1.

In [355]:
week2 = pd.read_csv('PBP - 2016 - Week 2.csv')

In [477]:
def get_trainloader(df):
    x_train = df['description']
    for index,row in df.iterrows():
        df.at[index,'winner'] = getWinner(row['gameId'], df)
    y_train = df['winner']
    
    x_train = x_train.values
    y_train = y_train.values

    ## create tokens 
    tokenizer = keras.preprocessing.text.Tokenizer(num_words = 1000)
    tokenizer.fit_on_texts(x_train)
    word_index = tokenizer.word_index

    ## ensure that all the data is actually text
    new_x_train = []
    for i in x_train:
        new_x_train.append(str(i))

    x_train = new_x_train

    ## convert texts to padded sequences 
    x_train = tokenizer.texts_to_sequences(x_train)
    x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen = 70)


    ## create training and validation split 
    split_size = int(0.8 * len(df))
    index_list = list(range(len(df)))
    train_idx, valid_idx = index_list[:split_size], index_list[split_size:]

    y_train = df['winner']
    y_train = y_train.values

    y_train = np.where(y_train=='home', 1, y_train) 
    y_train = np.where(y_train=='away', 0, y_train) 
    y_train = y_train.astype(np.int32)


    ## create iterator objects for train and valid datasets
    x_tr = torch.tensor(x_train[train_idx], dtype=torch.long)
    y_tr = torch.tensor(y_train[train_idx], dtype=torch.float32)
    train = TensorDataset(x_tr, y_tr)

    ## Just trying to get a good weight
    test = torch.tensor(y_train[train_idx], dtype=torch.int)
    print(test)
    class_sample_count = np.unique(test, return_counts=True)[1]
    print(class_sample_count)
    weight = 1. / class_sample_count

    print(weight)
    samples_weight = weight[test]
    print(len(samples_weight))

    trainloader = torch.utils.data.DataLoader(train, batch_size=1,sampler=torch.utils.data.sampler.WeightedRandomSampler(samples_weight, len(samples_weight)))

    return trainloader

In [478]:
train2 = get_trainloader(week2)

tensor([1, 1, 1,  ..., 1, 1, 1], dtype=torch.int32)
[1526 9312]
[0.00065531 0.00010739]
10838


In [485]:
total_correct = 0
total_images = 0
confusion_matrix = np.zeros([2,2], int)
preds = np.empty((0,1))
lebs = np.empty((0,1))
with torch.no_grad():
    for data in train2:
        images, labels = data
        outputs = model(images)
        bleh = outputs
        predicted = torch.clamp(bleh, min=0, max=1).float()
        total_images += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        preds = np.append(preds,predicted)
        lebs = np.append(lebs,labels)

print(total_correct)
model_accuracy = total_correct / total_images * 100
print('Model accuracy on {0} test images: {1:.2f}%'.format(total_images, model_accuracy))

4511
Model accuracy on 10838 test images: 41.62%


In [486]:
from sklearn.metrics import roc_auc_score
roc_auc_score(lebs, preds)

0.5086563388491979

adding training on a different week

In [422]:
## run for 10 Epochs
for epoch in range(10):
    train_loss, valid_loss = [], []
    ## training part 
    model.train()
    i=0
    running_loss = 0
    for data, target in train2:
        i+=1
        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target.view(-1,1))
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

[1,  2000] loss: 0.672
[1,  4000] loss: 0.520
[1,  6000] loss: 0.485
[1,  8000] loss: 0.477
[1, 10000] loss: 0.446
[1, 12000] loss: 0.415
[2,  2000] loss: 0.431
[2,  4000] loss: 0.378
[2,  6000] loss: 0.412
[2,  8000] loss: 0.379
[2, 10000] loss: 0.383
[2, 12000] loss: 0.359
[3,  2000] loss: 0.389
[3,  4000] loss: 0.359
[3,  6000] loss: 0.369
[3,  8000] loss: 0.372
[3, 10000] loss: 0.350
[3, 12000] loss: 0.347
[4,  2000] loss: 0.352
[4,  4000] loss: 0.337
[4,  6000] loss: 0.350
[4,  8000] loss: 0.358
[4, 10000] loss: 0.336
[4, 12000] loss: 0.318
[5,  2000] loss: 0.345
[5,  4000] loss: 0.363
[5,  6000] loss: 0.333
[5,  8000] loss: 0.345
[5, 10000] loss: 0.333
[5, 12000] loss: 0.335
[6,  2000] loss: 0.350
[6,  4000] loss: 0.340
[6,  6000] loss: 0.321
[6,  8000] loss: 0.345
[6, 10000] loss: 0.318
[6, 12000] loss: 0.318
[7,  2000] loss: 0.334
[7,  4000] loss: 0.317
[7,  6000] loss: 0.313
[7,  8000] loss: 0.317
[7, 10000] loss: 0.316
[7, 12000] loss: 0.319
[8,  2000] loss: 0.311
[8,  4000] 

In [481]:

week3 = pd.read_csv('PBP - 2016 - Week 3.csv')
train3 = get_trainloader(week3)

tensor([1, 1, 1,  ..., 1, 1, 1], dtype=torch.int32)
[3367 6725]
[0.000297  0.0001487]
10092


In [482]:
total_correct = 0
total_images = 0
confusion_matrix = np.zeros([2,2], int)
preds = np.empty((0,1))
lebs = np.empty((0,1))
with torch.no_grad():
    for data in train3:
        images, labels = data
        outputs = model(images)
        bleh = outputs
        predicted = torch.clamp(bleh, min=0, max=1).float()
        total_images += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        preds = np.append(preds,predicted)
        lebs = np.append(lebs,labels)

print(total_correct)
model_accuracy = total_correct / total_images * 100
print('Model accuracy on {0} test images: {1:.2f}%'.format(total_images, model_accuracy))

4458
Model accuracy on 10092 test images: 44.17%


In [483]:
from sklearn.metrics import roc_auc_score
roc_auc_score(lebs, preds)

0.5201682063922735

In [467]:
## run for 10 Epochs
for epoch in range(10):
    train_loss, valid_loss = [], []
    running_loss = 0.0
    ## training part 
    model.train()
    
    for i,data in enumerate(train3):
        # get the inputs
        inputs, labels = data
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

ValueError: Target size (torch.Size([1])) must be the same as input size (torch.Size([1, 1]))

In [436]:
bigdf = pd.concat([df, week2], ignore_index=True)

In [438]:
big_train = get_trainloader(bigdf)

In [440]:
## run for 10 Epochs
for epoch in range(10):
    train_loss, valid_loss = [], []
    ## training part 
    model.train()
    i=0
    running_loss = 0
    for data, target in big_train:
        i+=1
        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target.view(-1,1))
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

[1,  2000] loss: 0.695
[1,  4000] loss: 0.695
[1,  6000] loss: 0.697
[1,  8000] loss: 0.696
[1, 10000] loss: 0.698
[1, 12000] loss: 0.697
[1, 14000] loss: 0.696
[1, 16000] loss: 0.698
[1, 18000] loss: 0.694
[1, 20000] loss: 0.691
[1, 22000] loss: 0.694
[2,  2000] loss: 0.693
[2,  4000] loss: 0.696
[2,  6000] loss: 0.693
[2,  8000] loss: 0.695
[2, 10000] loss: 0.695
[2, 12000] loss: 0.693
[2, 14000] loss: 0.692
[2, 16000] loss: 0.699
[2, 18000] loss: 0.696
[2, 20000] loss: 0.694
[2, 22000] loss: 0.694
[3,  2000] loss: 0.699
[3,  4000] loss: 0.695
[3,  6000] loss: 0.694
[3,  8000] loss: 0.695
[3, 10000] loss: 0.699
[3, 12000] loss: 0.700
[3, 14000] loss: 0.695
[3, 16000] loss: 0.696
[3, 18000] loss: 0.695
[3, 20000] loss: 0.694
[3, 22000] loss: 0.695
[4,  2000] loss: 0.695
[4,  4000] loss: 0.698
[4,  6000] loss: 0.696
[4,  8000] loss: 0.696
[4, 10000] loss: 0.696
[4, 12000] loss: 0.699
[4, 14000] loss: 0.696
[4, 16000] loss: 0.698
[4, 18000] loss: 0.696
[4, 20000] loss: 0.698
[4, 22000] 