In [80]:
from funcs import *
from sklearn.datasets import load_boston
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import sys

## Sample Data to Test Nueral Network

In [81]:
X, y = load_boston(return_X_y=True)

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

In [174]:
x = np.array([[1, 2], [3, 4]])
x

array([[1, 2],
       [3, 4]])

array([[ 1,  4],
       [ 9, 16]])

In [177]:
x_sq = x ** 2
x_sq
np.sum(x_sq)

30

## Neural Network Class

In [200]:
class NeuralNetwork:
    
    def __init__(self, layers=None, nodes=None, nnodes=None, 
                 activations=[], activationFn="relu", batchSize=50, 
                 lr=.001, lr_type="constant", power_t=.5,
                 annealing_rate=.999, max_epoch=200, momentum=.9, 
                 tol=0.0001, alpha=.0001, early_stopping=False, 
                 num_epochs_stop=10):
        
        if layers != None:
            self.layers = layers # total number of hidden layers
        else:
            self.layers = len(nodes)

        # an int array of size [0, ..., Layers + 1]
        # Nodes[0] shall represent the input size (typically 50)
        # Nodes[Layers + 1] shall represent the output size (typically 1)
        # all other Nodes represent the number of nodes (or width) in the hidden layer i
        self.nodes = nodes
        if nodes != None:
            self.nodes.insert(0, batchSize)
            self.nodes.append(1)
        
        # alternative to nodes where each hidden layer of the nueral network is the same size
        self.nnodes = nnodes
        if nnodes != None:
            self.nodes = []
            self.nodes.append(batchSize)
            for i in range(layers):
                self.nodes.append(nnodes)
            self.nodes.append(1)
        
        # activations[i] values are labels indicating the activation function used in layer i
        self.activations = activations
        self.activationFn = activationFn
        if activationFn != "":
            self.activations = [activationFn] * self.layers
        
        self.batchSize = batchSize
        self.lr = lr
        self.lr_type = lr_type
        self.power_t = power_t
        self.annealing_rate = annealing_rate
        self.max_epoch = max_epoch
        self.mu = momentum
        self.tol = tol
        self.alpha = alpha
        
        if early_stopping == False:
            self.num_epochs_stop = max_epoch
        else:
            self.num_epochs_stop = num_epochs_stop
    
        self.layer_values = [None] * (self.layers + 2)
        self.iters = 0
        self.epochs = 0
                
    def validateHyperParams(self):
        
        if self.layers != (len(self.nodes) - 2):
            raise ValueError("layers must be equal to the number of hidden layers, got %s." % self.layers)
        if self.nnodes != None and self.nnodes <= 0:
            raise ValueError("nnodes must be > 0, got %s." % self.nnodes)
        if self.lr <= 0 or self.lr > 1:
            raise ValueError("lr must be in (0, 1], got %s." % self.lr)
            
        if self.lr_type not in ["constant", "invscaling", "annealing", "adaptive"]:
            raise ValueError("lr_type is not valid" % self.lr_type
                            + "\nAvailable lr types: constant, invscaling, adaptive")
            
        if self.max_epoch <= 0:
            raise ValueError("max_iter must be > 0, got %s." % self.max_epoch)
               
        activation_functions = list(ACTIVATIONS.keys())
        if self.activationFn != "":
            if self.activationFn not in activation_functions:
                raise ValueError("%s is not an activation function" % self.activationFn
                                + "\nAvailable activation functions: relu, leaky_relu, sigmoid, tanh")
    
    def initialize_weights(self, M):
        weights = []
        
        for i in range(self.layers + 1):
            if i == 0:
                input_size = M # special case for w1
            else:
                input_size = self.nodes[i]
            output_size = self.nodes[i + 1]
            
            # Xavier (Glorot) Initialization
            if self.activationFn == "tanh":
                target_variance = 2 / (input_size + output_size)
                w_i = np.random.normal(loc= 0, scale = np.sqrt(target_variance), size=(input_size, output_size))
            # He Initialization
            elif self.activationFn == "relu":
                target_variance = 2 / input_size
                w_i = np.random.normal(loc= 0, scale = np.sqrt(target_variance), size=(input_size, output_size))
            # Random Uniform
            else:
                w_i = np.random.uniform(-1/np.sqrt(input_size), 1/np.sqrt(input_size))
                #w_i = np.random.normal(size=(input_size, output_size))
            w_i = np.round(w_i, 2)
            w_i[input_size - 1:] = 0 # initialize bias to 0
            weights.append(w_i)
        return weights
    
    # returns the weight term for L2 regularization
    def get_weight_term(self):
        weight_term = 0
        for i in range(len(self.weights)):
            weight_term = np.sum(self.weights[i] ** 2)
        return weight_term
        
    def forward_pass(self, X_batch, y_batch):
        
        self.layer_values[0] = X_batch
        
        # calculate hidden layers
        for i in range(self.layers):
            X = self.layer_values[i]
            weights = self.weights[i]
            h_layer = X.dot(weights)
            
            # apply activation function
            activation_fn = ACTIVATIONS[self.activations[i]]
            activation_fn(h_layer)
            self.layer_values[i + 1] = h_layer
            
        
        # calculate predictions
        X = self.layer_values[self.layers] # values in last hidden layer
        weights = self.weights[self.layers]
        y_pred = X.dot(weights)
        y_pred = y_pred.flatten()
        
        # calculate the l2 loss
        l2_loss = 0
        # only need predictions once we have fit the data
        if isinstance(y_batch, np.ndarray): 
            l2_loss = squared_loss(y_pred, y_batch) # l2
#             weight_term = self.get_weight_term()
#             l2_loss += self.alpha * weight_term # l2 regularization
            self.layer_values[self.layers + 1] = l2_loss
        
        return l2_loss, y_pred
    
    
    def backward_pass(self, y_pred, y_batch):
        
        # loss layer
        J = squared_loss_derivative(y_pred, y_batch, self.batchSize)
        J = np.reshape(J, (len(J), 1))
        
        J_weights = [None] * (self.layers + 1)
        
        # output layer
        # jacobian w.r.t. weights
        x_t = self.layer_values[self.layers].T
        J_wi = x_t.dot(J)
        J_weights[self.layers] = J_wi
        
        # update jacobian at output layer
        w_t = self.weights[self.layers].T
        w_t = np.delete(w_t, w_t.shape[1] - 1, 1) # take out the bias
        J = np.dot(J, w_t)
        zeros = [0] * len(J)
        zeros = np.reshape(zeros, (len(J), 1))
        J = np.append(J, zeros, axis=1)
        
        # iterate through hidden layers backwards
        for i in range(self.layers, 0 , -1):
            # update jacobian at activation layer
            d_activation_fn = DERIVATIVES[self.activations[i - 1]]
            d_activation_fn(self.layer_values[i], J)
            
            # hidden layer
            # jacobian w.r.t. weights
            x_t = self.layer_values[i - 1].T
            J_wi = x_t.dot(J)
            J_weights[i - 1] = J_wi
            
            # jacobian w.r.t. inputs
            w_t = self.weights[i - 1].T
            w_t = np.delete(w_t, w_t.shape[1] - 1, 1)
            J = np.dot(J, w_t)
            zeros = [0] * len(J)
            zeros = np.reshape(zeros, (len(J), 1))
            J = np.append(J, zeros, axis=1)
            
            
        # initialize velocity to 0
        if self.epochs == 0 and self.iters == 0:
            self.velocity = []
            for i in range(len(J_weights)):
                n_rows = J_weights[i].shape[0]
                n_cols = J_weights[i].shape[1]
                vel_i = np.zeros((n_rows, n_cols))
                self.velocity.append(vel_i)
        
        for i in range(len(J_weights)):
            self.velocity[i] = self.mu * self.velocity[i] - self.lr * J_weights[i]
            self.weights[i] += self.velocity[i]
      
    
    def fit(self, X_train, y_train):
        
        self.validateHyperParams()
        # convert to numpy arrays
        if isinstance(X_train, pd.DataFrame):
            X_train = X_train.to_numpy()
            
        if isinstance(y_train, pd.Series):
            y_train = y_train.to_numpy()
            
        # add ones for bias
        ones = [1] * len(X_train)
        ones = np.reshape(ones, (len(X_train), 1))
        X_train = np.append(X_train, ones, axis=1)
        
        # save 10% for validation
        val_rows = round(len(X_train) * .1)
        X_val = X_train[:val_rows, :]
        y_val = y_train[:val_rows]
        
        X_train = X_train[val_rows:, :]
        y_train = y_train[val_rows:]
        
        # initalize weights on first iteration
        M = X_train.shape[1] # M = number of features
        self.weights = self.initialize_weights(M)
        
        previous_loss = np.inf
        n_epoch_no_change = 0
            
        while (self.epochs < self.max_epoch and n_epoch_no_change <= self.num_epochs_stop):
            # ONE EPOCH 
            last_idx = 0
            #np.random.shuffle(X_train) # shuffle data for each epoch 
            while (last_idx < len(X_train)):
                first_idx = self.iters * self.batchSize
                remaining_rows = len(X_train) - first_idx
                last_idx = first_idx + min(self.batchSize, remaining_rows)
                X_batch = X_train[first_idx: last_idx, :]
                y_batch = y_train[first_idx: last_idx]

                loss, y_pred = self.forward_pass(X_batch, y_batch)
                self.backward_pass(y_pred, y_batch)
                self.iters += 1
            
            # trainig and validation loss after one epoch
            t_loss, y_pred = self.forward_pass(X_train, y_train)
            v_loss, y_pred = self.forward_pass(X_val, y_val)
            print("epoch:", self.epochs)
            print("training loss:", t_loss)
            print("validation loss:", v_loss)
            
            self.iters = 0 # start over, next epoch
            self.epochs += 1
            
            # decrease the learning rate by one of three methods, if specified
            if self.lr_type == "invscaling":
                self.lr = self.lr/pow(self.epochs, self.power_t)
            elif self.lr_type == "annealing":
                self.lr = self.lr * self.annealing_rate
            elif self.lr_type == "adaptive":
                if n_epoch_no_change >= 2: 
                    self.lr = self.lr/5
                
            # stops when validation loss doesn't improve for num_epochs_stop
            if previous_loss - v_loss < self.tol: 
                n_epoch_no_change += 1
            else:
                n_epoch_no_change = 0
            previous_loss = v_loss
            print("num epoch no change:", n_epoch_no_change)
            
            
            
    
    def predict(self, X_test):
        
        # convert to numpy array
        if isinstance(X_test, pd.DataFrame):
            X_test = X_test.to_numpy()
        
        # add ones for bias
        ones = [1] * len(X_test)
        ones = np.reshape(ones, (len(X_test), 1))
        X_test = np.append(X_test, ones, axis=1)
        
        loss, y_pred = self.forward_pass(X_test, None)
        return y_pred
        

## Running Nueral Network on the Data

In [201]:
nodes = [100, 50, 100] # use to specify a number of hidden nodes per layer
activations = [] # use if you want a diff activationFn per layer

nn = NeuralNetwork(layers=3, nnodes=100, batchSize=50, 
                   activationFn="tanh", lr=.001, lr_type="annealing", 
                   max_epoch=2000, momentum=0.9, early_stopping=True)
nn.fit(X_std, y)

epoch: 0
training loss: 264.72394330398276
validation loss: 184.21362989874277
num epoch no change: 0
epoch: 1
training loss: 121.54397043887226
validation loss: 95.60397606267593
num epoch no change: 0
epoch: 2
training loss: 47.3516510024182
validation loss: 33.58201280550993
num epoch no change: 0
epoch: 3
training loss: 38.659051177770024
validation loss: 14.941949024046995
num epoch no change: 0
epoch: 4
training loss: 29.70672121159587
validation loss: 32.53632481235325
num epoch no change: 1
epoch: 5
training loss: 23.526012380034985
validation loss: 9.395078123534958
num epoch no change: 0
epoch: 6
training loss: 19.255157526750207
validation loss: 5.2729295585895315
num epoch no change: 0
epoch: 7
training loss: 15.314856627167826
validation loss: 4.145429410196056
num epoch no change: 0
epoch: 8
training loss: 11.716295428243793
validation loss: 3.873445998387917
num epoch no change: 0
epoch: 9
training loss: 9.455016986122011
validation loss: 2.750354871333595
num epoch no c

epoch: 88
training loss: 2.31803105803724
validation loss: 4.116690882384755
num epoch no change: 1
epoch: 89
training loss: 2.343785540213691
validation loss: 3.8723193575221537
num epoch no change: 0
epoch: 90
training loss: 2.3793361419088255
validation loss: 4.398370402301302
num epoch no change: 1
epoch: 91
training loss: 2.54751905666823
validation loss: 3.8571306886232284
num epoch no change: 0
epoch: 92
training loss: 2.7162975564498635
validation loss: 4.97612358931157
num epoch no change: 1
epoch: 93
training loss: 3.0575028516625844
validation loss: 4.1439548418154
num epoch no change: 0
epoch: 94
training loss: 3.3239700505397356
validation loss: 5.8019646157653115
num epoch no change: 1
epoch: 95
training loss: 3.698631938902314
validation loss: 4.551520387468745
num epoch no change: 0
epoch: 96
training loss: 3.7352286069634504
validation loss: 6.455278294776914
num epoch no change: 1
epoch: 97
training loss: 3.802215363748722
validation loss: 4.653276565172733
num epoch 

epoch: 174
training loss: 1.688091402384213
validation loss: 4.3375101750621425
num epoch no change: 2
epoch: 175
training loss: 1.7907104488000118
validation loss: 4.335930551763446
num epoch no change: 0
epoch: 176
training loss: 1.8807592735989715
validation loss: 4.583366685441302
num epoch no change: 1
epoch: 177
training loss: 2.016888054353651
validation loss: 4.522405206242128
num epoch no change: 0
epoch: 178
training loss: 2.1033085400202234
validation loss: 4.847622688003241
num epoch no change: 1
epoch: 179
training loss: 2.206336063673731
validation loss: 4.658586872060743
num epoch no change: 0
epoch: 180
training loss: 2.2073115538949892
validation loss: 4.999355809533918
num epoch no change: 1
epoch: 181
training loss: 2.2301228950247474
validation loss: 4.603601983124702
num epoch no change: 0
epoch: 182
training loss: 2.134886951790056
validation loss: 5.007426910912195
num epoch no change: 1
epoch: 183
training loss: 2.088592364381173
validation loss: 4.4555983719581

epoch: 260
training loss: 1.2080208511610895
validation loss: 4.891547602329574
num epoch no change: 5
epoch: 261
training loss: 1.196949648335299
validation loss: 4.902140353477648
num epoch no change: 6
epoch: 262
training loss: 1.1909579277321016
validation loss: 4.904706340748669
num epoch no change: 7
epoch: 263
training loss: 1.1799951665854655
validation loss: 4.919263348824082
num epoch no change: 8
epoch: 264
training loss: 1.1741767231579399
validation loss: 4.91818303999282
num epoch no change: 0
epoch: 265
training loss: 1.163335267351324
validation loss: 4.936253341046291
num epoch no change: 1
epoch: 266
training loss: 1.1576565098181941
validation loss: 4.931923584870131
num epoch no change: 0
epoch: 267
training loss: 1.1469527947943718
validation loss: 4.9530724562767245
num epoch no change: 1
epoch: 268
training loss: 1.1413834098685829
validation loss: 4.945890241042159
num epoch no change: 0
epoch: 269
training loss: 1.1308370874265117
validation loss: 4.96969104663

epoch: 347
training loss: 0.8647258593507872
validation loss: 5.41528553320357
num epoch no change: 0
epoch: 348
training loss: 0.8791864334979506
validation loss: 5.596506917810948
num epoch no change: 1
epoch: 349
training loss: 0.8915289581308985
validation loss: 5.4331525376168806
num epoch no change: 0
epoch: 350
training loss: 0.9047217715613636
validation loss: 5.62973426617884
num epoch no change: 1
epoch: 351
training loss: 0.914133754693554
validation loss: 5.444217878641879
num epoch no change: 0
epoch: 352
training loss: 0.9219888070339861
validation loss: 5.639315022213156
num epoch no change: 1
epoch: 353
training loss: 0.9255730741357129
validation loss: 5.44494033607239
num epoch no change: 0
epoch: 354
training loss: 0.9257717897192009
validation loss: 5.621588202533678
num epoch no change: 1
epoch: 355
training loss: 0.9221031675785114
validation loss: 5.437416643690493
num epoch no change: 0
epoch: 356
training loss: 0.9153527315921346
validation loss: 5.582734665577

epoch: 433
training loss: 0.6830567919730306
validation loss: 5.426015671626947
num epoch no change: 0
epoch: 434
training loss: 0.6830884222139707
validation loss: 5.478038581836805
num epoch no change: 1
epoch: 435
training loss: 0.6806674495251389
validation loss: 5.425040190440391
num epoch no change: 0
epoch: 436
training loss: 0.6807465270273224
validation loss: 5.477540698937058
num epoch no change: 1
epoch: 437
training loss: 0.6783740865999195
validation loss: 5.424012414636934
num epoch no change: 0
epoch: 438
training loss: 0.6785056460809199
validation loss: 5.477149676539316
num epoch no change: 1
epoch: 439
training loss: 0.6761823423054786
validation loss: 5.422933824991455
num epoch no change: 0
epoch: 440
training loss: 0.6763722711728177
validation loss: 5.476876213299878
num epoch no change: 1
epoch: 441
training loss: 0.6740994678979365
validation loss: 5.421806150694401
num epoch no change: 0
epoch: 442
training loss: 0.6743546576726958
validation loss: 5.476732771

epoch: 518
training loss: 0.6550965794530405
validation loss: 5.189510584422389
num epoch no change: 1
epoch: 519
training loss: 0.6478981495399674
validation loss: 5.17336200134245
num epoch no change: 0
epoch: 520
training loss: 0.6376589821471463
validation loss: 5.198918275442526
num epoch no change: 1
epoch: 521
training loss: 0.6326051189587295
validation loss: 5.18249365383381
num epoch no change: 0
epoch: 522
training loss: 0.6252267330926891
validation loss: 5.210352260994561
num epoch no change: 1
epoch: 523
training loss: 0.6214558518841443
validation loss: 5.193957586384416
num epoch no change: 0
epoch: 524
training loss: 0.6159456494661267
validation loss: 5.222687438124484
num epoch no change: 1
epoch: 525
training loss: 0.6129650598189408
validation loss: 5.2064904918647406
num epoch no change: 0
epoch: 526
training loss: 0.6087182582502657
validation loss: 5.235271743029296
num epoch no change: 1
epoch: 527
training loss: 0.6062390307781638
validation loss: 5.2193511552

epoch: 605
training loss: 0.5235820040710725
validation loss: 5.373614832949665
num epoch no change: 0
epoch: 606
training loss: 0.5232427286835378
validation loss: 5.394471423364223
num epoch no change: 1
epoch: 607
training loss: 0.5222874349428174
validation loss: 5.3737235970652195
num epoch no change: 0
epoch: 608
training loss: 0.5219572434437112
validation loss: 5.395057955387716
num epoch no change: 1
epoch: 609
training loss: 0.5210164996213307
validation loss: 5.3738050824609775
num epoch no change: 0
epoch: 610
training loss: 0.5206954511153976
validation loss: 5.395644546836067
num epoch no change: 1
epoch: 611
training loss: 0.5197698158090259
validation loss: 5.373862676079781
num epoch no change: 0
epoch: 612
training loss: 0.5194579812895336
validation loss: 5.396234848995773
num epoch no change: 1
epoch: 613
training loss: 0.5185480708542999
validation loss: 5.3738994621428215
num epoch no change: 0
epoch: 614
training loss: 0.5182455303031033
validation loss: 5.396832

epoch: 692
training loss: 0.49468780601452017
validation loss: 5.433584276819682
num epoch no change: 1
epoch: 693
training loss: 0.4948854346185132
validation loss: 5.37614433958583
num epoch no change: 0
epoch: 694
training loss: 0.49410235126212615
validation loss: 5.43411234991374
num epoch no change: 1
epoch: 695
training loss: 0.4942899110061998
validation loss: 5.376268895293642
num epoch no change: 0
epoch: 696
training loss: 0.4934305531984448
validation loss: 5.434541329268462
num epoch no change: 1
epoch: 697
training loss: 0.4936028531434979
validation loss: 5.376379300618543
num epoch no change: 0
epoch: 698
training loss: 0.4926656611925516
validation loss: 5.434868680651068
num epoch no change: 1
epoch: 699
training loss: 0.4928178063847015
validation loss: 5.376475572637202
num epoch no change: 0
epoch: 700
training loss: 0.4918021871570131
validation loss: 5.435093306385477
num epoch no change: 1
epoch: 701
training loss: 0.4919296911293952
validation loss: 5.376558461

epoch: 779
training loss: 0.42512428208881303
validation loss: 5.393152355068439
num epoch no change: 0
epoch: 780
training loss: 0.4242110799168016
validation loss: 5.417198539119324
num epoch no change: 1
epoch: 781
training loss: 0.4237952459240221
validation loss: 5.393858280971864
num epoch no change: 0
epoch: 782
training loss: 0.42290898732545806
validation loss: 5.4169454707837055
num epoch no change: 1
epoch: 783
training loss: 0.4224963463783933
validation loss: 5.3945572686150856
num epoch no change: 0
epoch: 784
training loss: 0.4216359548903664
validation loss: 5.416705737815981
num epoch no change: 1
epoch: 785
training loss: 0.42122634087526056
validation loss: 5.395248551832592
num epoch no change: 0
epoch: 786
training loss: 0.4203907568321362
validation loss: 5.416479015378331
num epoch no change: 1
epoch: 787
training loss: 0.4199839926705527
validation loss: 5.395931465999634
num epoch no change: 0
epoch: 788
training loss: 0.4191721772107865
validation loss: 5.4162

epoch: 864
training loss: 0.38370322956310493
validation loss: 5.416298554422444
num epoch no change: 1
epoch: 865
training loss: 0.3833196788728924
validation loss: 5.414795104976169
num epoch no change: 0
epoch: 866
training loss: 0.3829096899753887
validation loss: 5.416482036499543
num epoch no change: 1
epoch: 867
training loss: 0.3825266185354192
validation loss: 5.415124746281577
num epoch no change: 0
epoch: 868
training loss: 0.3821196441166815
validation loss: 5.416672510770077
num epoch no change: 1
epoch: 869
training loss: 0.38173710694406127
validation loss: 5.415450795718478
num epoch no change: 0
epoch: 870
training loss: 0.3813330062406222
validation loss: 5.416869757477993
num epoch no change: 1
epoch: 871
training loss: 0.3809510573781673
validation loss: 5.4157735785264585
num epoch no change: 0
epoch: 872
training loss: 0.3805496995038385
validation loss: 5.417073558013861
num epoch no change: 1
epoch: 873
training loss: 0.3801683916715022
validation loss: 5.416093

## Mean Absolute Error of Housing Predictions

In [202]:
mae = mean_absolute_error(y, nn.predict(X_std))
print('Mean absolute error: $%0.2f'%(mae*1000))

Mean absolute error: $809.69


Compare these to results to those in nn_tuning_example.ipynb.  Goal: Get MAE Under $1000 with our NN.  Then, we know our NN is working well and can use it on the dataset for this project.

## Compare to Linear Regression

In [8]:
class LR:
    
    def fit(self, X_train, y_train):
        # create vector of ones...
        ones = np.ones(shape=len(X_train))[..., None]
        #...and add to feature matrix
        X = np.concatenate((ones, X_train), 1)
        #calculate coefficients using closed-form solution
        self.coeffs = np.linalg.inv(X.transpose().dot(X)).dot(X.transpose()).dot(y_train)
        
    def predict(self, X_test):
        ones = np.ones(shape=len(X_test))[..., None]
        X_test = np.concatenate((ones, X_test), 1)
        y_hat = X_test.dot(self.coeffs)
        return y_hat

## Linear Regression MAE

In [9]:
lr = LR()
lr.fit(X, y)
mae = mean_absolute_error(y, lr.predict(X_std))
print('Mean absolute error: $%0.2f'%(mae*1000))

Mean absolute error: $17885.89
