In [3]:
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv('../pitches.csv')
#drop unkown prediction classes and prediction classes with no definition from data source
df.drop_duplicates(inplace=True)
df = df[df.pitch_type != 'UN']
df = df[df.pitch_type != 'FA']

features = ['break_angle', 'break_length', 'end_speed', 'start_speed', 'spin_rate', 'pitch_type']
#remove wild pitches or intentional walks as break length outliers. Code sourced from https://stackoverflow.com/questions/23199796/detect-and-exclude-outliers-in-pandas-data-frame
for feature in features:
    if feature != 'pitch_type':
        df = df[np.abs(df[feature]-df[feature].mean()) <= (12*df[feature].std())]

#drop data points where prediction class is unreliable
df = df[df.type_confidence >= 1.0]

#Drop any data with incomplete records
df.dropna(inplace=True)

In [2]:
from scipy.special import expit
import sys
import pandas as pd
import numpy as np

class MultiLayerPerceptronBase(object):
    def __init__(self, layers = 2, layer_width=[4, 3, 3], C = 0.0,
                 epochs = 500, eta = 0.001, random_state = None):
        np.random.seed(random_state)
        self.C = C
        self.epochs = epochs
        self.eta = eta
        self.layer_width = layer_width
        #add widths for input and output layers
#         self.layer_width.insert(0, 8)
        #output layer
#         self.layer_width.append(8)
        
        
        
    @staticmethod
    def _encode_labels(y):
        onehot = pd.get_dummies(y).values.T
        return onehot
    
    def _initialize_weights(self):
        self.weights = []
        for idx, w in enumerate(self.layer_width[:-1]):
            Wn_num_elements = (w + 1) * self.layer_width[idx+1]
            self.weights.append(np.random.uniform(-1.0, 1.0, size = Wn_num_elements).
                                reshape(self.layer_width[idx+1], w + 1))
#         for w in self.weights:
#             print(w.shape)
    
    @staticmethod
    def _sigmoid(z):
        return expit(z)
    
    @staticmethod
    def _add_bias_unit(X, how='column'):
        """Add bias unit (column or row of 1s) to array at index 0"""
        if how == 'column':
            ones = np.ones((X.shape[0], 1))
            X_new = np.hstack((ones, X))
        elif how == 'row':
        
            ones = np.ones((1, X.shape[1]))
            X_new = np.vstack((ones, X))
        return X_new
    
    @staticmethod
    def _l2_reg(lambda_, weights):
        t = 0
        for w in weights:
            t += np.mean(x[:, 1:] **2)
        t = np.sqrt(t)
        t *= (lambda_/len(weights))
        return t
    def _cost(self, A_n, y_enc, weights):
        cost = np.mean((Y_enc - A_n) **2)
        L2_term = self._l2_reg(self.C, weights)
        return cost + L2_term
    

In [75]:
class MultiLayerPerceptron(MultiLayerPerceptronBase):
    def _feedforward(self, X):
        A = []
        Z = []
        for idx, W_n in enumerate(self.weights):
            if len(Z) > 0:
                A_n = self._sigmoid(Z[-1])
                A_n = self._add_bias_unit(A_n.reshape(-1, 1), how="row")
                Z_n = W_n @ A_n
                A.append(A_n)
                Z.append(Z_n)
            else:
                A_n = self._add_bias_unit(X.T.reshape(-1, 1), how="row")
                Z_n = W_n @ A_n
                A.append(A_n)
                Z.append(Z_n)
        A.append(self._sigmoid(Z[-1]))
        return A, Z
    
    def _get_gradient(self, A, Z, Y_enc):
        gradients = []
        #gradient of objective function w/r to last activation
        gradients.insert(0,np.zeros(self.weights[-1].shape))
        for a, a2, y in zip(A[-1].T, A[-2].T,Y_enc.T):
            dJ_dzl = -2*(y - a)*a*(1-a)
            gradients[0]+= dJ_dzl[:, np.newaxis] @ a2[np.newaxis,:]
            
        return gradients
                
            
            
        

In [77]:
x = np.array([1,2,3,4,5,6,7,8])
x.reshape(1,8)
mc = MultiLayerPerceptron(2, [x.shape[0], 9, 9])
mc._initialize_weights()
A, Z = mc._feedforward(x)
y_enc = np.array([2,1,2])
print(A)
print(mc._get_gradient(A, Z, y_enc))


[array([[1.],
       [1.],
       [2.],
       [3.],
       [4.],
       [5.],
       [6.],
       [7.],
       [8.]]), array([[1.00000000e+00],
       [1.79690181e-06],
       [5.93682552e-03],
       [2.24345541e-07],
       [9.99989317e-01],
       [1.02513463e-01],
       [8.13121590e-02],
       [9.99955879e-01],
       [8.75149686e-01],
       [1.51239611e-07]]), array([[0.50023493],
       [0.32891478],
       [0.81055778],
       [0.29737335],
       [0.6977265 ],
       [0.54202016],
       [0.57902878],
       [0.71641033],
       [0.79036486]])]
[array([[-7.49882369e-01, -1.34746499e-06, -4.45192079e-03,
        -1.68232766e-07, -7.49874358e-01, -7.68730385e-02,
        -6.09745544e-02, -7.49849284e-01, -6.56259320e-01,
        -1.13411918e-07],
       [-7.37716770e-01, -1.32560460e-06, -4.37969575e-03,
        -1.65503468e-07, -7.37708888e-01, -7.56259007e-02,
        -5.99853433e-02, -7.37684221e-01, -6.45612599e-01,
        -1.11571997e-07],
       [-3.65286897e-01, -6.56