# Making a Neural Network using MLB Batting Statcast Stats

# Importing needed libraries and defining the class which holds the model

In [26]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from sklearn.preprocessing import StandardScaler


class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize,hiddenSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, hiddenSize)
        self.linear2 = torch.nn.Linear(hiddenSize,hiddenSize)
        self.linear3= torch.nn.Linear(hiddenSize, outputSize)
        self.ReLU= torch.nn.ReLU()
        self.Sigmoid= torch.nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        # x = self.ReLU(x)
        # x = self.linear2(x)
        x= self.ReLU(x)
        x=self.linear3(x)
        return x

# Loading in data from CSV table

In [30]:
#Importing CSV with all stats (as of June 6 2021)
stats=pd.read_csv('stats.csv')


#Extracting variables of interest 
age=stats.loc[:,'player_age']

HR=stats.loc[:,'b_home_run']

b_avg=stats.loc[:,'batting_avg']

OPS=stats.loc[:,'on_base_plus_slg']

e_vel=stats.loc[:,'exit_velocity_avg']

#Importing CSV with all stats of the 2019 Season 
stats_2019=pd.read_csv('stats_2019.csv')

age_19=stats_2019.loc[:,'player_age']

HR_19=stats_2019.loc[:,'b_home_run']

b_avg_19=stats_2019.loc[:,'batting_avg']

OPS_19=stats_2019.loc[:,'on_base_plus_slg']

e_vel_19=stats_2019.loc[:,'exit_velocity_avg']



# Preping the data to feed into model

In [48]:
inputs=[OPS_19,e_vel_19]
outputs=[HR_19]
inputs_transformed=[]
outputs_transformed=[]

def data_preparer(inputs,transformed_list):  
    """
    Takes in a list in which each element is an input variable and then preps
    it accordingly to return it as one combined GPU pytorch tensor. 
    """
    

    #makes all inputs np arrays of np.float32
    #and makes scailing to mean 0 and std of 1
    for i in range (len(inputs)):
        transformed_list.append(np.array(inputs[i],dtype=np.float32).reshape(-1,1))
                                                                                                    
    reshape=np.column_stack(transformed_list)
    out=torch.from_numpy(reshape).to('cuda:0')
    return(out)




inputs_tensor=data_preparer(inputs,inputs_transformed)

outputs_tensor=data_preparer(outputs,outputs_transformed)

train_size=int(np.floor(0.6*len(outputs_tensor)))
test_size=int(np.floor(0.2*len(outputs_tensor)))
validate_size=int(len(outputs_tensor)-train_size-test_size)

#Create Tensor Datasets 
train_ds, test_ds, validate_ds=torch.utils.data.random_split(TensorDataset(inputs_tensor,outputs_tensor),[train_size,test_size,validate_size])  

#Create Data Loaders
train_dl=DataLoader(train_ds,batch_size=32,shuffle=True)
test_dl=DataLoader(test_ds,batch_size=32,shuffle=True)
validate_dl=DataLoader(validate_ds,batch_size=32,shuffle=True)





# Creating the model training loop

In [49]:

inputDim=len(inputs)
outputDim=1 
hiddenSize=50
learningRate=0.01

model = linearRegression(inputDim, outputDim,hiddenSize)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

#Training Loop
epochs=2000

epoch_array=np.zeros(epochs)
loss_array=np.zeros(epochs)
for epoch in range(epochs): #Forward Pass and loss
    for xb,yb in train_dl:
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model(xb)

        # get loss for the predicted output
        loss = criterion(outputs, yb)
        # print(loss)
        # get gradients w.r.t to parameters, (backward pass)
        loss.backward()

        # update parameters
        optimizer.step()

        epoch_array[epoch]=epoch 
        loss_array[epoch]=loss.item()

        if epoch %100==0:
            print('epoch {}, loss {}'.format(epoch, loss.item()))

with torch.no_grad(): # we don't need gradients in the testing phase
    predicted = model(inputs_tensor)
    print(predicted)




57425689697266
epoch 1300, loss 36.3574104309082
epoch 1300, loss 29.137073516845703
epoch 1300, loss 83.60862731933594
epoch 1300, loss 54.5191650390625
epoch 1300, loss 51.180633544921875
epoch 1300, loss 28.888242721557617
epoch 1300, loss 38.59818649291992
epoch 1400, loss 65.62138366699219
epoch 1400, loss 69.75750732421875
epoch 1400, loss 45.25830841064453
epoch 1400, loss 72.03501892089844
epoch 1400, loss 43.04065704345703
epoch 1400, loss 51.0902099609375
epoch 1400, loss 64.57899475097656
epoch 1400, loss 71.1993179321289
epoch 1400, loss 87.78205871582031
epoch 1400, loss 40.97297668457031
epoch 1400, loss 26.890623092651367
epoch 1400, loss 73.57624816894531
epoch 1400, loss 66.93058013916016
epoch 1400, loss 95.06254577636719
epoch 1500, loss 29.267959594726562
epoch 1500, loss 63.503204345703125
epoch 1500, loss 48.71284103393555
epoch 1500, loss 41.07213592529297
epoch 1500, loss 32.018619537353516
epoch 1500, loss 52.30335998535156
epoch 1500, loss 31.990718841552734
e

# Plotting the Results of the training loop of the model

In [52]:
from mpl_toolkits.mplot3d import Axes3D

%matplotlib qt
fig=plt.figure()
ax= plt.axes(projection='3d')
ax.scatter3D(inputs_tensor.cpu().detach().numpy()[:,0].flatten(),inputs_tensor.cpu().detach().numpy()[:,1].flatten(),predicted.cpu().detach().numpy().flatten(),c='green',label='Model Predicitons')

ax.scatter3D(inputs_tensor.cpu().detach().numpy()[:,0],inputs_tensor.cpu().detach().numpy()[:,1],outputs_tensor.cpu().detach().numpy(), c='blue', alpha=0.5, label='Actual Values')

ax.set_title('OPS, Average Exit Velocity and Home Runs in the 2019 MLB Season')
ax.set_xlabel('OPS')
ax.set_ylabel('Exit Velocity (MPH)')
ax.set_zlabel('HR')
ax.legend()

<matplotlib.legend.Legend at 0x7f69252604d0>