In [606]:
import numpy as np
from sklearn import datasets
from sklearn import preprocessing

In [607]:
# creating: neural net for regression
# 5 features
# 1 hidden layer, 10 nodes, leaky-relu activation
# MSE loss function

# house_sk_data = datasets.load_boston()
# # size of training and cross validation sets
# m_train = 400
# m_cv = 506 - m_train

# easier dataset to work with
house_sk_data = datasets.load_diabetes()
m_train = 400
m_cv = 442 - m_train

# training set, using features:
# B 1000(Bk - 0.63)^2, LSTAT % lower status of the population
# scaled for zero mean and unit variance
house_sk_train = preprocessing.scale(house_sk_data.data[:m_train, :5])
house_sk_train_bias = np.column_stack([np.ones((m_train, 1)), house_sk_train])

# CV set
house_sk_cv = preprocessing.scale(house_sk_data.data[m_train:, :5])
house_sk_cv_bias = np.column_stack([np.ones((m_cv, 1)), house_sk_cv])

# target set
house_sk_target = house_sk_data.target[:m_train]

In [608]:
#randomly initialize weights for first and second layer
weights = np.random.rand(10,6)
weights2 = np.random.rand(1,11)

In [609]:
# Forward and backward propagates for a given training example _i_
def propagate(i):
    global weights2, weights

    # learning rate
    # 0.0003 retains lowest MSE for CV set
    a = 0.0003

    # z for second layer and activation (leaky-ReLu)
    z2 = np.dot(weights, np.transpose(house_sk_train_bias[i, :]))
    a2 = np.where(z2 > 0, z2, 0.01*z2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer and final activation (ReLu)
    z3 = np.dot(weights2, a2_bias)
    # regression, so hypothesis is linear
    hypot = z3
    # Backpropagates to accumulate partials and deltas
    # Delta for third layer (mean squared error)
    delta3 =  hypot - house_sk_target[i]
    # derivative of error w.r.t weights, second layer
    layer2partials = np.multiply(delta3, a2_bias)
    print('layer 2 partials: ', layer2partials)
    # derivative of error w.r.t weights, first layer

    # stack this the # of input features there are + 1
    layer2partials_matrix = np.column_stack([layer2partials[1:] for n in range(6)])

    # stack this the number of hidden layer units there are
    feat_bias_matrix = np.row_stack([house_sk_train_bias[i,:] for n in range(10)])

    layer1partials = np.multiply(layer2partials_matrix, feat_bias_matrix)
    print('layer 1 partials: ', layer1partials)
    weights2 = weights2 - (a*layer2partials)
    weights = weights - (a*layer1partials)

In [610]:
# Train weights
for z in range(0, m_train):
    propagate(z)

hypot:  [150.12151749]
layer 2 partials:  [ -0.87848251 -10.27191893  -5.11838229  -4.86325694  -7.55266979
  -6.38003737 -12.96911295  -6.06959333  -4.61089812  -5.76274461
  -2.20900685]
layer 1 partials:  [[-10.27191893  -8.17066245 -10.74504014 -13.6320872   -4.77385637
    9.45832054]
 [ -5.11838229  -4.07134969  -5.3541333   -6.79271655  -2.37875922
    4.71297531]
 [ -4.86325694  -3.8684136   -5.08725696  -6.45413415  -2.26019017
    4.47805744]
 [ -7.55266979  -6.00767159  -7.90054329 -10.02331248  -3.51009009
    6.95445246]
 [ -6.38003737  -5.07491659  -6.6738998   -8.46708647  -2.96511122
    5.87469965]
 [-12.96911295 -10.31610987 -13.56646604 -17.2115921   -6.02737258
   11.94188041]
 [ -6.06959333  -4.82797797  -6.34915681  -8.05508943  -2.82083289
    5.58884466]
 [ -4.61089812  -3.66767811  -4.82327458  -6.11922327  -2.14290685
    4.24568698]
 [ -5.76274461  -4.58389921  -6.02817474  -7.64786381  -2.67822549
    5.30630022]
 [ -2.20900685  -1.75712536  -2.31075298  -2.

In [611]:
# predict example in given set
def predict(j, pred_set):
    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(pred_set[j, :]))
    #print('z2', z2)
    a2 = np.where(z2 > 0, z2, 0.01*z2)
    print('a2', a2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer, no final activation for now
    z3 = np.dot(weights2, a2_bias)

    # calculate error
    sq_error =  (np.square(z3 - house_sk_target[j]))
    print(f'Prediction: {z3}, actual: {house_sk_target[j]}, error: {sq_error}')
    return sq_error

In [612]:
tot_err = 0
# Predict CV example and see average error
for x in range(0, m_cv):
    global tot_err
    tot_err += predict(x, house_sk_cv_bias)

print('MSE CV ($): ', tot_err/ (2 * m_cv))

a2 [1.92005259 9.20295114 7.81515483 6.70979529 0.18206268 3.26804811
 7.27028269 9.06408469 1.96857699 5.12126436]
Prediction: [206.69898498], actual: 151.0, error: [3102.37692763]
a2 [-3.71125322e-02  2.44429674e+00  3.46424882e+00  7.43280483e+00
 -3.30695121e-02 -7.66056893e-03 -1.49295764e-02  1.09272333e+01
 -7.89301139e-03 -2.06102807e-02]
Prediction: [114.17878345], actual: 75.0, error: [1534.97707289]
a2 [ 1.48447062e+01  4.02248949e+00  3.91245907e+00 -1.70038181e-02
  7.60802541e+00  2.36947107e+01  4.65722221e+00 -4.20560848e-02
  4.72483831e+00  2.73995375e+00]
Prediction: [186.49932688], actual: 141.0, error: [2070.18874633]
a2 [ 3.05106966e+00  4.19262262e+00  5.67319332e+00  1.46747292e+01
  2.37226026e+00  3.98132355e+00  3.04118785e+00  1.21976721e+01
  4.00859746e+00 -7.30369800e-04]
Prediction: [215.13892207], actual: 206.0, error: [83.51989653]
a2 [2.93745388 7.84693542 7.11823469 8.18268587 1.0070872  4.99922459
 6.19645997 9.51377737 1.84463197 3.5599858 ]
Predic

In [613]:
tot_err = 0
# Predict in training example and see average error
for y in range(0, m_train):
    global tot_err
    tot_err += predict(y, house_sk_train_bias)

print('MSE training ($): ', tot_err/(2 * m_train))

a2 [16.9153837   4.72273711  4.51021055  5.83191629  8.82750844 23.72726854
  5.21561235  0.41374305  6.5346846   1.18216069]
Prediction: [233.92982797], actual: 151.0, error: [6877.35636705]
a2 [-6.82829978e-02  4.43881005e+00  4.66600043e+00  6.25063146e+00
 -5.25480456e-02 -5.80726565e-02 -1.05668594e-03  1.14808928e+01
 -6.03926289e-03  2.43556151e-01]
Prediction: [127.55046021], actual: 75.0, error: [2761.55086867]
a2 [ 1.82611526e+01  3.38054300e+00  4.04361994e+00  4.86899006e+00
  1.02785395e+01  2.68004000e+01  4.97706478e+00 -1.03796208e-02
  5.84441205e+00  8.49724129e-01]
Prediction: [231.1578495], actual: 141.0, error: [8128.43782711]
a2 [-0.10745404  3.69524441  3.96233547 12.05100844 -0.08083286 -0.13671558
 -0.03125251 16.26120058  1.62185461 -0.0245567 ]
Prediction: [168.26062484], actual: 206.0, error: [1424.26043725]
a2 [-0.04334849  7.26437152  6.5852667   4.25040973 -0.03504579 -0.03257253
  4.01030822  9.47821064  0.05366232  3.9657866 ]
Prediction: [152.60139303]