In [71]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsapi

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

%matplotlib inline

In [11]:
hits = pd.read_csv("player_stats_7_23_19.csv")

In [14]:
hits['player_got_hit'] = hits['player_got_hit'].apply(float)

In [18]:
data = np.array(hits.iloc[:, 3:-1])
data

array([[ 97.   ,  91.   ,  58.   , ...,   1.667,   1.   ,   1.   ],
       [ 18.   ,   5.   ,   1.   , ...,   0.   ,   0.   ,   0.   ],
       [ 11.   ,   2.   ,   3.   , ...,   0.   ,   0.   ,   1.   ],
       ..., 
       [ 95.   ,  57.   ,  78.   , ...,   0.773,   0.5  ,   0.   ],
       [ 64.   ,  67.   ,  33.   , ...,   1.4  ,   1.   ,   1.   ],
       [  7.   ,   1.   ,   1.   , ...,   0.   ,   0.   ,   0.   ]])

In [21]:
labels = np.array(hits.iloc[:, -1])
labels

array([ 1.,  0.,  1.,  1.,  1.,  1.,  0.,  1.,  0.,  1.,  0.,  1.,  1.,
        1.,  1.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,
        1.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  1.,  0.,
        0.,  0.,  1.,  1.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,
        0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  1.,
        0.,  1.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
        0.,  0.,  1.,  1.,  0.,  0.,  1.,  0.,  1.,  1.,  0.,  1.,  0.,
        0.,  0.,  1.,  0.,  1.,  1.,  1.,  0.,  1.,  0.,  1.,  0.,  1.,
        0.,  1.,  1.,  1.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,
        0.,  0.,  1.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
        1.,  0.,  1.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  1.,
        0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
        1.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  1.,  0.,  0.,  0

In [63]:
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.2)
data_train, data_val, labels_train, labels_val = train_test_split(data_train, labels_train, test_size=0.2)

In [64]:
np.mean(labels_train == 0)

0.54935622317596566

In [65]:
np.mean(labels_train == 1)

0.45064377682403434

In [66]:
logreg = LogisticRegression(solver='newton-cg', C=100).fit(data_train, labels_train)

In [67]:
np.mean(logreg.predict(data_train) == labels_train)

0.81974248927038629

In [69]:
np.mean(logreg.predict(data_val) == labels_val)

0.74576271186440679

## Now the fun begins

In [58]:
num_rows = len(data)
num_feats = len(np.transpose(data))
print(num_rows)
print(num_feats)

365
61


### Convert our arrays to torch tensors

In [92]:
data_train_t = torch.from_numpy(data_train).type(torch.FloatTensor)
data_val_t = torch.from_numpy(data_val).type(torch.FloatTensor)
data_test_t = torch.from_numpy(data_test).type(torch.FloatTensor)

labels_train_t = torch.from_numpy(labels_train).type(torch.LongTensor)
labels_val_t = torch.from_numpy(labels_val).type(torch.LongTensor)
labels_test_t = torch.from_numpy(labels_test).type(torch.LongTensor)

### Then begin training

In [108]:
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(num_feats, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, len(data_train_t))
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
    
        return x
    
    def predict(self, x):
        predictions = F.softmax(self.forward(x))
        return predictions
        

In [110]:
model = NeuralNet()

In [104]:
# Hyperparameters
learning_rate = .001
epochs = 100

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [105]:
losses = []
for i in range(epochs):
    outputs = model.forward(data_train_t)
    loss = criterion(outputs, labels_train_t)
    losses.append(loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [106]:
losses

[0.1382243037223816,
 3.30961012840271,
 0.45492711663246155,
 2.5906989574432373,
 2.3309059143066406,
 0.6081636548042297,
 0.35924047231674194,
 1.2671750783920288,
 1.5102070569992065,
 0.9819403886795044,
 0.2631914019584656,
 0.39550989866256714,
 0.8843850493431091,
 0.8810398578643799,
 0.426871120929718,
 0.15440185368061066,
 0.37233003973960876,
 0.6030002236366272,
 0.5761265754699707,
 0.3525485396385193,
 0.17228670418262482,
 0.25729283690452576,
 0.4322543740272522,
 0.37130671739578247,
 0.190165713429451,
 0.15425419807434082,
 0.24627089500427246,
 0.3045112192630768,
 0.2563822269439697,
 0.16785204410552979,
 0.16283361613750458,
 0.23163925111293793,
 0.25197094678878784,
 0.19298873841762543,
 0.14706218242645264,
 0.16991952061653137,
 0.20614689588546753,
 0.19580954313278198,
 0.15613102912902832,
 0.1433539092540741,
 0.1693648248910904,
 0.18507878482341766,
 0.16345655918121338,
 0.14349928498268127,
 0.15358342230319977,
 0.16939890384674072,
 0.1633559316

In [116]:
_, predicted = torch.max(outputs, 1)
np.mean(predicted.numpy() == labels_train)

0.94849785407725318