In [1]:
from funcs import *
from sklearn.datasets import load_boston
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import sys

## Sample Data to Test Nueral Network

In [2]:
X, y = load_boston(return_X_y=True)

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

## Neural Network Class

In [3]:
class NeuralNetwork:
    
    def __init__(self, layers, nodes, activations=[], 
                 batchSize=50, activationFn="", lr=.01,
                 max_epochs=100):
        
        self.layers = layers # total number of hidden layers
        
        self.nodes = nodes
        # an int array of size [0, ..., Layers + 1]
        # Nodes[0] shall represent the input size (typically 50)
        # Nodes[Layers + 1] shall represent the output size (typically 1)
        # all other Nodes represent the number of nodes (or width) in the hidden layer i
        
        self.nnodes = [nodes[0], nodes[1], nodes[2]]
        # alternative to nodes where each hidden layer of the nueral network is the same size
        
        self.activations = activations
        # activations[0] and activations[Layers + 1] are left unused
        # activations[i] values are labels indicating the activation function used in layer i
        
        self.batchSize = batchSize
        
        self.activationFn = activationFn
        
        self.lr = lr
        
        self.layer_values = [None] * (self.layers + 2)
        self.iters = 0
        self.epochs = 0
        self.max_epochs = max_epochs
        
        activation_functions = list(ACTIVATIONS.keys())
        if activationFn != "":
            if activationFn not in activation_functions:
                print("User error: %s is not an activation function"
                      % activationFn)
                print("Available activation functions: " + 
                      "relu, leaky_relu, sigmoid, tanh")
                sys.exit(0)
            else:
                self.activations = [activationFn] * layers
    
    
    def initialize_weights(self, M):
        weights = []
        
        for i in range(self.layers + 1):
            if i == 0:
                input_size = M # special case for w1
            else:
                input_size = self.nodes[i]
            output_size = self.nodes[i + 1]
            w_i = np.random.normal(size=(input_size, output_size))
            w_i = np.round(w_i, 2)
            w_i[input_size - 1:] = 0 # initialize bias to 0
            weights.append(w_i)
        return weights
       
        
    def forward_pass(self, X_batch, y_batch):
        
        self.layer_values[0] = X_batch
        
        # calculate hidden layers
        for i in range(layers):
            X = self.layer_values[i]
            weights = self.weights[i]
            h_layer = X.dot(weights)
            
            # apply activation function
            activation_fn = ACTIVATIONS[self.activations[i]]
            activation_fn(h_layer)
            self.layer_values[i + 1] = h_layer
            
        
        # calculate predictions
        X = self.layer_values[self.layers] # values in last hidden layer
        weights = self.weights[self.layers]
        y_pred = X.dot(weights)
        y_pred = y_pred.flatten()
        
        # calculate the l2 loss
        l2_loss = 0
        # only need predictions once we have fit the data
        if isinstance(y_batch, np.ndarray): 
            l2_loss = squared_loss(y_pred, y_batch)
            self.layer_values[self.layers + 1] = l2_loss
        
        return l2_loss, y_pred
    
    
    def backward_pass(self, y_pred, y_batch):
        # loss layer
        J = squared_loss_derivative(y_pred, y_batch, self.batchSize)
        J = np.reshape(J, (len(J), 1))
        
        J_weights = [None] * (layers + 1)
        
        # output layer
        # jacobian w.r.t. weights
        x_t = self.layer_values[self.layers].T
        J_wi = x_t.dot(J)
        J_weights[self.layers] = J_wi
        
        # update jacobian at output layer
        w_t = self.weights[self.layers].T
        w_t = np.delete(w_t, w_t.shape[1] - 1, 1) # take out the bias
        J = np.dot(J, w_t)
        zeros = [0] * len(J)
        zeros = np.reshape(zeros, (len(J), 1))
        J = np.append(J, zeros, axis=1)
        
        # iterate through hidden layers backwards
        for i in range(layers, 0 , -1):
            # update jacobian at activation layer
            d_activation_fn = DERIVATIVES[self.activations[i - 1]]
            d_activation_fn(self.layer_values[i], J)
            
            # hidden layer
            # jacobian w.r.t. weights
            x_t = self.layer_values[i - 1].T
            J_wi = x_t.dot(J)
            J_weights[i - 1] = J_wi
        
        for i in range(len(J_weights)):
            self.weights[i] -= self.lr * J_weights[i]
      
    
    def fit(self, X_train, y_train):
        
        # convert to numpy arrays
        if isinstance(X_train, pd.DataFrame):
            X_train = X_train.to_numpy()
            
        if isinstance(y_train, pd.Series):
            y_train = y_train.to_numpy()
            
        # add ones for bias
        ones = [1] * len(X_train)
        ones = np.reshape(ones, (len(X_train), 1))
        X_train = np.append(X_train, ones, axis=1)
        
        # save 10% for validation
        val_rows = round(len(X_train) * .1)
        X_val = X_train[:val_rows, :]
        y_val = y_train[:val_rows]
        
        X_train = X_train[val_rows:, :]
        y_train = y_train[val_rows:]
        
        # initalize weights on first iteration
        M = X_train.shape[1] # M = number of features
        self.weights = self.initialize_weights(M)
        
        while (self.epochs < self.max_epochs):
            # ONE EPOCH 
            last_idx = 0
            while (last_idx < len(X_train)):
                first_idx = self.iters * self.batchSize
                remaining_rows = len(X_train) - first_idx
                last_idx = first_idx + min(self.batchSize, remaining_rows)
                X_batch = X_train[first_idx: last_idx, :]
                y_batch = y_train[first_idx: last_idx]

                loss, y_pred = self.forward_pass(X_batch, y_batch)
                self.backward_pass(y_pred, y_batch)
                self.iters += 1
            
            # trainig and validation loss after one epoch
            t_loss, y_pred = self.forward_pass(X_train, y_train)
            v_loss, y_pred = self.forward_pass(X_val, y_val)
            print("epoch:", self.epochs)
            print("training loss:", t_loss)
            print("validation loss:", v_loss)
            
            self.iters = 0 # start over, next epoch
            self.epochs += 1
       
    
    def predict(self, X_test):
        
        # convert to numpy array
        if isinstance(X_test, pd.DataFrame):
            X_test = X_test.to_numpy()
        
        # add ones for bias
        ones = [1] * len(X_test)
        ones = np.reshape(ones, (len(X_test), 1))
        X_test = np.append(X_test, ones, axis=1)
        
        loss, y_pred = self.forward_pass(X_test, None)
        return y_pred
        

## Running Nueral Network on the Data

In [11]:
layers = 3
nodes = [50, 100, 100, 100, 1]
activations = [] # use if you want a diff activationFn per layer

nn = NeuralNetwork(layers, nodes, 
                   batchSize=50, activationFn="tanh", lr=.01, 
                   max_epochs=20, 
                  )
nn.fit(X_std, y)

epoch: 0
training loss: 314.6014476169217
validation loss: 431.62803604519985
epoch: 1
training loss: 205.71224458494908
validation loss: 359.83267979508645
epoch: 2
training loss: 151.89873097924288
validation loss: 319.5624509046413
epoch: 3
training loss: 115.01280366310033
validation loss: 291.40512304084996
epoch: 4
training loss: 90.5150703695522
validation loss: 268.6364109111713
epoch: 5
training loss: 71.38309209528568
validation loss: 250.06380203565953
epoch: 6
training loss: 57.59082068391165
validation loss: 238.80851334802827
epoch: 7
training loss: 48.14404568588401
validation loss: 233.92133805770112
epoch: 8
training loss: 40.15226402718389
validation loss: 230.68141055298955
epoch: 9
training loss: 34.448010224333906
validation loss: 228.18099139243444
epoch: 10
training loss: 30.173909105782965
validation loss: 226.27052003498665
epoch: 11
training loss: 26.343353201004085
validation loss: 223.37607101431726
epoch: 12
training loss: 23.083380733811303
validation loss

## Mean Absolute Error of Housing Predictions

In [12]:
mae = mean_absolute_error(y, nn.predict(X_std))
print('Mean absolute error: $%0.2f'%(mae*1000))

Mean absolute error: $4823.07


Compare these to results to those in nn_tuning_example.ipynb.  Goal: Get MAE Under $1000 with our NN.  Then, we know our NN is working and can use it on the dataset for this project

In [22]:
class LR:
    
    def fit(self, X_train, y_train):
        # create vector of ones...
        ones = np.ones(shape=len(X_train))[..., None]
        #...and add to feature matrix
        X = np.concatenate((ones, X_train), 1)
        #calculate coefficients using closed-form solution
        self.coeffs = np.inv(X.transpose().dot(X)).dot(X.transpose()).dot(y_train)
        
    def predict(self, X_test):
        ones = np.ones(shape=len(X_test))[..., None]
        X_test = np.concatenate((ones, X_test), 1)
        y_hat = X_test.dot(self.coeffs)
        return y_hat

## Linear Regression MAE (compare)

In [23]:
lr = LR()
lr.fit(X, y)
mae = mean_absolute_error(y, lr.predict(X_std))
print('Mean absolute error: $%0.2f'%(mae*1000))

NameError: name 'inv' is not defined