In [4]:
from funcs import *

## Sample Data to Test Nueral Network

In [5]:
df = pd.read_csv("housing/boston_fixed.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [6]:
X = df.drop("MEDV",axis=1)
y = df["MEDV"]

## Neural Network Class

In [79]:
# NN

class NeuralNetwork:
    def __init__(self, layers, nodes, activations, batchSize=50, activationFn="relu", lr=.01):
        
        self.layers = layers # total number of hidden layers
        
        self.nodes = nodes
        # an int array of size [0, ..., Layers + 1]
        # Nodes[0] shall represent the input size (typically 50)
        # Nodes[Layers + 1] shall represent the output size (typically 1)
        # all other Nodes represent the number of nodes (or width) in the hidden layer i
        
        self.nnodes = [nodes[0], nodes[1], nodes[2]]
        # alternative to nodes where each hidden layer of the nueral network is the same size
        
        self.activations = activations
        # activations[0] and activations[Layers + 1] are left unused
        # activations[i] values are labels indicating the activation function used in layer i
        
        self.batchSize = batchSize
        
        self.activationFn = activationFn
        
        self.lr = lr
        
        self.layer_values = [None] * (self.layers + 2)
        self.iters = 0
        
    def initialize_weights(self, M):
        weights = []
        
        for i in range(self.layers + 1):
            if i == 0:
                input_size = M # special case for w1
            else:
                input_size = self.nodes[i]
            output_size = self.nodes[i + 1]
            w_i = np.random.normal(size=(input_size, output_size))
            w_i = np.round(w_i, 2)
            w_i[input_size - 1:] = 0 # initialize bias to 0
            weights.append(w_i)
        return weights
        
    def forward_pass(self, X_train, y_train):
        
        # add ones for bias
        X_train["ones"] = 1 
        
        # get batch from the data
        batch_slice = np.random.choice(len(X_train), 
                                       replace = False, 
                                       size = self.batchSize) 
        X_batch = X_train.iloc[batch_slice]
        y_batch = y_train.iloc[batch_slice]
        
        # convert to numpy arrays
        X_batch = X_batch.to_numpy()
        self.y_batch = y_batch.to_numpy()
        
        
        if self.iters == 0:
            # initialize weights
            M = X_batch.shape[1] # M = number of features
            self.weights = self.initialize_weights(M)
            
        self.layer_values[0] = X_batch
        
        # calculate hidden layers
        for i in range(layers):
            X = self.layer_values[i]
            weights = self.weights[i]
            h_layer = X.dot(weights)
            
            # apply activation function
            activation_fn = ACTIVATIONS[self.activations[i]]
            activation_fn(h_layer)
            self.layer_values[i + 1] = h_layer
            
        
        # calculate predictions
        X = self.layer_values[self.layers] # values in last hidden layer
        weights = self.weights[self.layers]
        y_pred = X.dot(weights)
        self.y_pred = y_pred.flatten()
        
        # calculate the l2 loss
        l2_loss = squared_loss(self.y_pred, y_batch)
        self.layer_values[self.layers + 1] = l2_loss
        self.iters += 1
        
        return l2_loss
    
    def backprop(self):
        # loss layer
        J = squared_loss_derivative(self.y_pred, self.y_batch, self.batchSize)
        J = np.reshape(J, (len(J), 1))
        
        J_weights = [None] * (layers + 1)
        
        # output layer
        # jacobian w.r.t. weights
        x_t = self.layer_values[self.layers].T
        J_wi = x_t.dot(J)
        J_weights[self.layers] = J_wi
        
        # update jacobian at output layer
        w_t = self.weights[self.layers].T
        J = np.dot(J, w_t)
        
        # iterate through hidden layers backwards
        for i in range(layers, 0 , -1):
            # update jacobian at activation layer
            d_activation_fn = DERIVATIVES[self.activations[i - 1]]
            d_activation_fn(self.layer_values[i], J)
            
            # hidden layer
            # jacobian w.r.t. weights
            x_t = self.layer_values[i - 1].T
            J_wi = x_t.dot(J)
            J_weights[i - 1] = J_wi
        
        for i in range(len(J_weights)):
            self.weights[i] -= self.lr * J_weights[i]
        
    def predict(self, X_train, y_train):
        l2_loss = self.forward_pass(X_train, y_train)
        print("1st loss", l2_loss)
        
        self.backprop()
        l2_loss = self.forward_pass(X_train, y_train)
        print("2nd loss", l2_loss)
        
        

## Running Nueral Network on the Data

In [80]:
layers = 3
nodes = [50, 4, 4, 4, 1]
activations = ["relu", "relu", "relu"]

NN = NeuralNetwork(layers, nodes, activations)
NN.predict(X, y)

1st loss 107927.33513636138
2nd loss 802489855272.4691
