In [534]:
import numpy as np
from sklearn import datasets
from sklearn import preprocessing

In [535]:
# creating: neural net for regression
# 5 features
# 1 hidden layer, 10 nodes, leaky-relu activation
# MSE loss function

# house_sk_data = datasets.load_boston()
# # size of training and cross validation sets
# m_train = 400
# m_cv = 506 - m_train

# easier dataset to work with
house_sk_data = datasets.load_diabetes()
m_train = 400
m_cv = 442 - m_train

# training set, using features:
# B 1000(Bk - 0.63)^2, LSTAT % lower status of the population
# scaled for zero mean and unit variance
house_sk_train = preprocessing.scale(house_sk_data.data[:m_train, :5])
house_sk_train_bias = np.column_stack([np.ones((m_train, 1)), house_sk_train])

# CV set
house_sk_cv = preprocessing.scale(house_sk_data.data[m_train:, :5])
house_sk_cv_bias = np.column_stack([np.ones((m_cv, 1)), house_sk_cv])

# target set
house_sk_target = house_sk_data.target[:m_train]

In [536]:
#randomly initialize weights for first and second layer
weights = np.random.rand(10,6)
weights2 = np.random.rand(1,11)

In [537]:
# Forward and backward propagates for a given training example _i_
def propagate(i):
    global weights2, weights

    # learning rate
    # 0.0003 retains lowest MSE for CV set
    a = 0.0003

    # z for second layer and activation (leaky-ReLu)
    z2 = np.dot(weights, np.transpose(house_sk_train_bias[i, :]))
    a2 = np.where(z2 > 0, z2, 0.01*z2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer and final activation (ReLu)
    z3 = np.dot(weights2, a2_bias)
    # regression, so hypothesis is linear
    hypot = z3

    # Backpropagates to accumulate partials and deltas
    # Delta for third layer (mean squared error)
    delta3 =  hypot - house_sk_target[i]
    # derivative of error w.r.t weights, second layer
    layer2partials = np.multiply(delta3, a2_bias)
    print('layer 2 partials: ', layer2partials)
    # derivative of error w.r.t weights, first layer

    # stack this the # of input features there are + 1
    layer2partials_matrix = np.column_stack([layer2partials[1:] for n in range(6)])

    # stack this the number of hidden layer units there are
    feat_bias_matrix = np.row_stack([house_sk_train_bias[i,:] for n in range(10)])

    layer1partials = np.multiply(layer2partials_matrix, feat_bias_matrix)
    print('layer 1 partials: ', layer1partials)
    weights2 = weights2 - a*layer2partials
    weights = weights - a*layer1partials

In [538]:
# Train weights
for z in range(0, m_train):
    propagate(z)

layer 2 partials:  [-140.86166425 -333.43519632 -245.97595525 -360.41452345 -296.27545108
 -229.4062837  -300.05876366 -245.78155474 -404.29124999 -303.10233973
 -345.26116764]
layer 1 partials:  [[-333.43519632 -265.22662977 -348.79311215 -442.50910691 -154.96342482
   307.02510289]
 [-245.97595525 -195.65832982 -257.30552711 -326.44004436 -114.31689537
   226.49316509]
 [-360.41452345 -286.68697974 -377.01509824 -478.3139592  -167.50201996
   331.86750338]
 [-296.27545108 -235.66840046 -309.92180122 -393.19360014 -137.69349816
   272.80863524]
 [-229.4062837  -182.47820309 -239.97266191 -304.45007257 -106.61616947
   211.2359122 ]
 [-300.05876366 -238.67778656 -313.87937194 -398.21451662 -139.45178607
   276.29228648]
 [-245.78155474 -195.50369649 -257.10217257 -326.18205121 -114.22654807
   226.31416228]
 [-404.29124999 -321.58814325 -422.91277242 -536.54371805 -187.89365195
   372.2689266 ]
 [-303.10233973 -241.098759   -317.06313412 -402.25371267 -140.86628273
   279.09479283]
 [-

In [539]:
# predict example in given set
def predict(j, pred_set):
    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(pred_set[j, :]))
    #print('z2', z2)
    a2 = np.where(z2 > 0, z2, 0.01*z2)
    print('a2', a2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer, no final activation for now
    z3 = np.dot(weights2, a2_bias)

    # calculate error
    sq_error =  (np.square(z3 - house_sk_target[j]))
    print(f'Prediction: {z3}, actual: {house_sk_target[j]}, error: {sq_error}')
    return sq_error

In [540]:
tot_err = 0
# Predict CV example and see average error
for x in range(0, m_cv):
    global tot_err
    tot_err += predict(x, house_sk_cv_bias)

print('MSE CV ($): ', tot_err/m_cv)

a2 [7.63161794 5.09118314 7.00310232 3.49678576 2.96634285 2.62415253
 5.40320457 5.61475268 9.84405015 3.93732244]
Prediction: [214.40480097], actual: 151.0, error: [4020.16878548]
a2 [-8.64526362e-03 -1.62481917e-02 -4.18531393e-03  2.04560992e-01
 -5.40854358e-03  3.50940227e-01 -2.38697416e-02  4.47364152e-01
  1.33600492e+01  7.08570763e-01]
Prediction: [111.87330902], actual: 75.0, error: [1359.6409179]
a2 [ 6.99212846  3.932375    7.8004519   8.37796987  4.8587042   6.43328167
  4.51781529  9.54383448 -0.03442394  7.37022782]
Prediction: [182.32685783], actual: 141.0, error: [1707.90917825]
a2 [ 3.52216382  1.83118618  3.68462022  2.2965735   2.7074965   3.38361219
  1.90665653  4.22441046 16.75560197  4.53008516]
Prediction: [217.2752418], actual: 206.0, error: [127.13107756]
a2 [ 6.48647398  4.34119896  6.25020136  3.5436811   2.97225336  3.16197421
  4.42482373  5.61045057 11.01163076  4.2962762 ]
Prediction: [214.10274053], actual: 135.0, error: [6257.2435591]
a2 [ 1.0740793

In [541]:
tot_err = 0
# Predict in training example and see average error
for y in range(0, m_train):
    global tot_err
    tot_err += predict(y, house_sk_train_bias)

print('MSE training ($): ', tot_err/m_train)

a2 [ 7.74330619  5.0793487   8.34715872  8.60830407  5.51584635  7.50425146
  5.41920735 10.54497645  2.56957576  8.69121662]
Prediction: [223.73892763], actual: 151.0, error: [5290.95159255]
a2 [ 4.74349630e-01 -8.53309159e-03  2.22181599e-01 -5.99179598e-03
 -8.46640775e-03 -9.77912972e-03 -1.23426025e-02 -2.84347622e-03
  1.31593361e+01 -3.06511335e-03]
Prediction: [108.22834499], actual: 75.0, error: [1104.12291088]
a2 [ 7.02262514  4.49055786  8.20457872  8.99217459  5.90858057  8.27502321
  4.80543307 10.89075667  1.50137751  9.22250759]
Prediction: [215.2876909], actual: 141.0, error: [5518.66101929]
a2 [-1.42001228e-02 -2.46630003e-02 -2.42450284e-02 -2.81256684e-02
 -2.46936052e-02 -2.91882105e-02 -2.95208338e-02 -2.87071043e-02
  1.93141938e+01 -1.91668441e-02]
Prediction: [152.4426615], actual: 206.0, error: [2868.38850754]
a2 [ 4.22249534  2.01803321  3.65095504  0.9831482   0.80544961  0.08687566
  2.18145798  2.06141669 10.18249359  1.09622587]
Prediction: [138.81617746],