In [1]:
from funcs import *

In [2]:
df = pd.read_csv("housing/boston_fixed.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [3]:
X = df.drop("MEDV",axis=1)
y = df["MEDV"]

### Inital Class for 1 layer

In [21]:
# NN

class NeuralNetwork:
    def __init__(self, layers, nodes, activations, batchSize=50, activationFn="relu", lr=.1):
        
        self.layers = layers # total number of hidden layers
        
        self.nodes = nodes
        # an int array of size [0, ..., Layers + 1]
        # Nodes[0] shall represent the input size (typically 50)
        # Nodes[Layers + 1] shall represent the output size (typically 1)
        # all other Nodes represent the number of nodes (or width) in the hidden layer i
        
        self.nnodes = [nodes[0], nodes[1], nodes[2]]
        # alternative to nodes where each hidden layer of the nueral network is the same size
        
        self.activations = activations
        # activations[0] and activations[Layers + 1] are left unused
        # activations[i] values are labels indicating the activation function used in layer i
        
        self.batchSize = batchSize
        
        self.activationFn = activationFn
        
        self.lr = lr
      
    # loss over batch
    # x = B * 1 = vector of predictions
    # y = B * 1 = vector of labals
    # output L(x) has size 1
    def jacobian_loss_layer(self, x, y):
        J = {}
        B = len(x)
        for i in range(B):
            J[i] = d_L(x, y, B)
        return pd.series(J)
    
    # x has size B * M
    # output σ(x) has size B * M
    
    def jacobian_activation_layer(self, x, J, func):
        new_x = x.applymap(func).copy()
        J = J * new_x
        return J
    
    def jacobian_dense_layer_input(self, x):
        pass
    
    def jacobian_dense_layer_weights(self, x):
        pass
    
    def forward_pass(self, X_train, y_train):
        
        """ activations : list, length = n_layers - 1
             The ith element of the list holds the values of the ith layer.
        """
        self.activations = []
        self.weights = []
        
        # activations = [None] * 
        df_train = X_train.copy()
        #Do we need to copy the data here? probably can just keep it as X_train
        
        response = y_train.name
        df_train[response] = y_train
        
        df_train_batch = df_train.sample(self.batchSize) # get batch
        X_batch = df_train_batch.drop(response, axis=1)
        X_batch["ones"] = 1 # add ones for bias
        y_batch = df_train_batch[response]
        
        X_batch = X_batch.to_numpy()
        self.y_batch = y_batch.to_numpy()
        
        self.activations.append(X_batch)
        
        M = X_batch.shape[1] # M = number of features
        N = nodes[1] # N = number of nodes in hidden layer
        
        # weights = M * N
        w1 = np.random.normal(size=(M, N)) # initalize weights
        w1 = np.round(w1, 2)
        w1[M-1:] = 0 # initialize biases to 0
        self.weights.append(w1)
        
        h1 = X_batch.dot(w1) # first hidden layer
        h1_activation_function = ACTIVATIONS[self.activationFn]
        h1_activation_function(h1) # h1 is now "activated"
        
        self.activations.append(h1)
        
        w2 = np.random.normal(size=N) # initialize weights
        w2 = np.round(w2, 2)
        w2[N - 1] = 0 # initialize bias to 0
        
        self.z = h1.dot(w2) # z = predictions
        
        w2 = np.reshape(w2, (N, 1))
        self.weights.append(w2)
        
        loss = squared_loss(self.z, y_batch)
        
        self.activations.append(loss)
        
        return self.activations
    
    def backprop(self):
        # loss layer
        J = squared_loss_derivative(self.z, self.y_batch, self.batchSize)
        J = np.reshape(J, (len(J), 1))

        # output layer
        # jacobian w.r.t. weights
        x_t = self.activations[1].T
        J_w2 = x_t.dot(J)
        print(J_w2.shape)
        
        # update jacobian at output layer
        w2_t = self.weights[1].T
        print(w2_t.shape)
        J = np.dot(J, w2_t)
        
        # update jacobian at activation layer
        inplace_relu_derivative(activations[1], J)
        
        # hidden layer
        # jacobian w.r.t. weights
        x_t = activations[0].T
        J_w1 = x_t.dot(J)
        
        # update weights
        self.weights[1] = self.weights[1] - self.lr * J_w2
        self.weights[0] = self.weights[0] - self.lr * J_w1
        
        
        
    def predict(self):
        self.backprop()
        

### New Class for Multiple Layers

In [26]:
# NN with Multiple layers

class NeuralNetwork:
    def __init__(self, layers, nodes, activations, batchSize=50, activationFn="relu", lr=.1):
        
        self.layers = layers # total number of hidden layers
        
        self.nodes = nodes
        # an int array of size [0, ..., Layers + 1]
        # Nodes[0] shall represent the input size (typically 50)
        # Nodes[Layers + 1] shall represent the output size (typically 1)
        # all other Nodes represent the number of nodes (or width) in the hidden layer i
        
        self.nnodes = [nodes[i] for i in range(len(nodes))]
        # alternative to nodes where each hidden layer of the nueral network is the same size
        
        self.activations = activations
        # activations[0] and activations[Layers + 1] are left unused
        # activations[i] values are labels indicating the activation function used in layer i
        
        self.batchSize = batchSize
        
        self.activationFn = activationFn
        
        self.lr = lr
    
    def forward_pass(self, X_train, y_train):
        
        """ activations : list, length = n_layers - 1
             The ith element of the list holds the values of the ith layer.
        """
        self.activations = []
        self.weights = []
        
        # activations = [None] * 
        
        response = y_train.name
        X_train[response] = y_train
        
        X_train_batch = X_train.sample(self.batchSize) # get batch
        X_batch = X_train_batch.drop(response, axis=1)
        X_batch["ones"] = 1 # add ones for bias
        y_batch = X_train_batch[response]
        
        X_batch = X_batch.to_numpy()
        self.y_batch = y_batch.to_numpy()
        
        self.activations.append(X_batch)
        
        M = X_batch.shape[1] # M = number of features
        
        for i in range(layers):
            N = nodes[1 + i] # N = number of nodes in hidden layer

            # weights = M * N
            w1 = np.random.normal(size=(M, N)) # initalize weights
            w1 = np.round(w1, 2)
            w1[M-1:] = 0 # initialize biases to 0
            self.weights.append(w1)

            h1 = X_batch.dot(w1) # first hidden layer
            h1_activation_function = ACTIVATIONS[self.activationFn]
            h1_activation_function(h1) # h1 is now "activated"

            self.activations.append(h1)

        w2 = np.random.normal(size=N) # initialize weights
        w2 = np.round(w2, 2)
        w2[N - 1] = 0 # initialize bias to 0

        self.z = h1.dot(w2) # z = predictions

        w2 = np.reshape(w2, (N, 1))
        self.weights.append(w2)

        loss = squared_loss(self.z, y_batch)

        self.activations.append(loss)

        return self.activations
    
    def backprop(self):
        # loss layer
        J = squared_loss_derivative(self.z, self.y_batch, self.batchSize)
        J = np.reshape(J, (len(J), 1))

        # output layer
        # jacobian w.r.t. weights
        x_t = self.activations[len(activations)-2].T
        J_w2 = x_t.dot(J)
        
        # update jacobian at output layer
        w2_t = self.weights[len(self.weights)-1].T
        J = np.dot(J, w2_t)
        
        #update this weight
        self.weights[len(self.weights)-1] = self.weights[len(self.weights)-1] - self.lr * J_w2
        
        for i in range(layers, 0, -1):
            # update jacobian at activation layer
            inplace_relu_derivative(activations[i], J)

            # hidden layer
            # jacobian w.r.t. weights
            x_t = activations[0].T
            J_w1 = x_t.dot(J)
        
            # update weights
            self.weights[i-1] = self.weights[i-1] - self.lr * J_w1
        
        
        
    def predict(self):
        self.backprop()
        

In [27]:
layers = 1
nodes = [50, 4, 1]
activations = [None, "relu", None]

NN = NeuralNetwork(layers, nodes, activations)
print("layers:", NN.layers)
print("nodes:", NN.nodes)
print("activations:", NN.activations)
print("activationFn:", NN.activationFn)

activations = NN.forward_pass(X, y)
#print("dimensitons:", z.shape)
# for i in range(len(NN.weights)):
#     print(i, ":", NN.weights[i])
activations[2]

layers: 1
nodes: [50, 4, 1]
activations: [None, 'relu', None]
activationFn: relu


313677.0353812608

In [28]:
layers = 2
nodes = [50, 4, 4, 1]
activations = [None, "relu", None]

NN = NeuralNetwork(layers, nodes, activations)
print("layers:", NN.layers)
print("nodes:", NN.nodes)
print("activations:", NN.activations)
print("activationFn:", NN.activationFn)

activations = NN.forward_pass(X, y)
#print("dimensitons:", z.shape)
activations[3]

layers: 2
nodes: [50, 4, 4, 1]
activations: [None, 'relu', None]
activationFn: relu


109432.83908113603

In [29]:
for i in range(len(NN.weights)):
    print(i, ":", NN.weights[i])

0 : [[-1.35  0.64 -0.06  1.  ]
 [-0.69 -0.12  0.09  0.32]
 [ 0.16 -1.08 -0.1   0.21]
 [-0.64  0.65 -2.76 -1.39]
 [ 0.56 -0.4   1.49  0.68]
 [-0.4  -0.12  0.16  1.09]
 [-0.25  0.37  1.13  1.65]
 [ 0.77 -1.09  0.01 -0.5 ]
 [-0.14 -0.41 -0.75  0.28]
 [ 1.1   1.39  0.4   0.9 ]
 [ 0.63 -2.74 -1.33 -2.29]
 [ 0.18  0.01 -0.72 -0.22]
 [-0.86 -0.63  2.39  3.86]
 [ 0.    0.    0.    0.  ]]
1 : [[-0.65 -1.46 -0.35 -0.56]
 [ 1.56  0.52  0.13  0.36]
 [ 0.59  0.78 -0.93 -0.69]
 [-0.36  0.42 -0.03  0.35]
 [ 1.01 -2.05 -1.78 -0.36]
 [ 0.74  1.33  0.89  0.87]
 [ 0.62 -1.05 -1.11 -1.14]
 [ 1.04 -1.77 -0.06  0.83]
 [ 1.03 -1.74 -0.35 -0.16]
 [ 0.26  0.46  0.27 -2.18]
 [-0.53 -1.31 -0.31  0.99]
 [ 2.3  -0.94 -0.28  0.6 ]
 [ 2.15  0.76  1.38  0.13]
 [ 0.    0.    0.    0.  ]]
2 : [[-0.42]
 [-1.02]
 [-0.6 ]
 [ 0.  ]]


In [30]:
NN.backprop()

In [31]:
for i in range(len(NN.weights)):
    print(i, ":", NN.weights[i])

0 : [[-6.51366796e+01 -5.18322367e+01 -3.28156532e+01  1.00000000e+00]
 [-2.86463064e+02 -1.20000000e-01  9.00000000e-02  3.20000000e-01]
 [-2.07339985e+02 -4.50238864e+01 -5.12671531e+01  2.10000000e-01]
 [-1.44933282e+00  6.50000000e-01 -2.76000000e+00 -1.39000000e+00]
 [-1.00086252e+01 -2.04627665e+00 -1.78877994e-01  6.80000000e-01]
 [-1.22367362e+02 -1.53606301e+01 -1.48210611e+01  1.09000000e+00]
 [-1.30420032e+03 -2.26029793e+02 -2.12649446e+02  1.65000000e+00]
 [-7.98361369e+01 -6.02966060e+00 -5.61318651e+00 -5.00000000e-01]
 [-1.93947137e+02 -5.86781367e+01 -5.06861411e+01  2.80000000e-01]
 [-8.16634538e+03 -1.61555079e+03 -1.71312304e+03  9.00000000e-01]
 [-3.56393500e+02 -5.17823484e+01 -5.25014271e+01 -2.29000000e+00]
 [-7.12888614e+03 -1.57217656e+02 -4.54931346e+02 -2.20000000e-01]
 [-2.48386224e+02 -4.75948530e+01 -4.93849965e+01  3.86000000e+00]
 [-1.94883209e+01 -2.42783903e+00 -2.53594355e+00  0.00000000e+00]]
1 : [[-6.44366796e+01 -5.39322367e+01 -3.31056532e+01 -5.

We can see the weights updated

In [9]:
w2 = NN.weights[len(NN.weights)-1]
w2.shape

(4, 1)

In [10]:
w2 = np.reshape(w2, (4, 1))
w2.shape

(4, 1)