In [82]:
import numpy as np
import math
from sklearn import datasets
from sklearn import preprocessing

In [83]:
# creating: neural net for regression
# 2 features
# 1 hidden layer, 2 nodes, leaky-ReLu activation
# MSE loss function, no regularization

house_sk_data = datasets.load_boston()
# size of training and cross validation sets
m_train = 400
m_cv = 506 - m_train

# training set, using features:
# B 1000(Bk - 0.63)^2, LSTAT % lower status of the population
# scaled for zero mean and unit variance
house_sk_train = preprocessing.scale(house_sk_data.data[:m_train, 10:12])
house_sk_train_bias = np.column_stack([np.ones((m_train, 1)), house_sk_train])

# CV set
house_sk_cv = preprocessing.scale(house_sk_data.data[m_train:, 10:12])
house_sk_cv_bias = np.column_stack([np.ones((m_cv, 1)), house_sk_cv])

# target set
house_sk_target = house_sk_data.target[:m_train]

In [84]:
#randomly initialize weights for first and second layer
weights = np.random.rand(2,3)
weights2 = np.random.rand(1,3)

In [85]:
# Forward and backward propagates for a given training example _i_
def propagate(i):
    global weights2, weights

    #learning rate
    a = 0.009

    # z for second layer and activation (leaky-ReLu)
    z2 = np.dot(weights, np.transpose(house_sk_train_bias[i, :]))
    print('z2: ', z2)
    a2 = np.where(z2 > 0, z2, z2 * .01)
    print('a2: ', a2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer and final activation (ReLu)
    z3 = np.dot(weights2, a2_bias)
    hypot = z3
    #hypot = np.maximum(0, z3)

    #print(f'Prediction: {z3}')

    # Backpropagates to accumulate partials and deltas
    # Delta for third layer (mean squared error)
    delta3 =  hypot - house_sk_target[i]
    # derivative of error w.r.t weights, second layer
    layer2partials = np.multiply(delta3, a2_bias)

    # derivative of error w.r.t weights, first layer
    # big_delta2 without bias
    delta2 = np.multiply(delta3, a2)
    delta2_matrix = np.column_stack([delta2, delta2, delta2])
    feat_bias_matrix = np.row_stack([house_sk_train_bias[i,:], house_sk_train_bias[i,:]])
    layer1partials = np.multiply(delta2_matrix, feat_bias_matrix)

    weights2 = weights2 - a*layer2partials
    weights = weights - a*layer1partials

In [86]:
# Train weights
for i in range(0, m_train):
    propagate(i)

# fix: underfitting with small learning rate, overflow with large learning rate

z2:  [ 0.29332464 -0.43389722]
a2:  [ 0.29332464 -0.00433897]
z2:  [0.56667022 0.66440049]
a2:  [0.56667022 0.66440049]
z2:  [0.58215727 0.74622091]
a2:  [0.58215727 0.74622091]
z2:  [0.85193159 1.40479236]
a2:  [0.85193159 1.40479236]
z2:  [1.20386779 1.92025995]
a2:  [1.20386779 1.92025995]
z2:  [1.54028551 2.53187981]
a2:  [1.54028551 2.53187981]
z2:  [1.4022564  0.96549863]
a2:  [1.4022564  0.96549863]
z2:  [2.02494718 1.40104905]
a2:  [2.02494718 1.40104905]
z2:  [2.51901407 1.69040085]
a2:  [2.51901407 1.69040085]
z2:  [2.79283269 1.87462551]
a2:  [2.79283269 1.87462551]
z2:  [3.39502127 2.313424  ]
a2:  [3.39502127 2.313424  ]
z2:  [3.4395804  2.36800124]
a2:  [3.4395804  2.36800124]
z2:  [3.36376372 2.2822888 ]
a2:  [3.36376372 2.2822888 ]
z2:  [1.79014641 4.23612419]
a2:  [1.79014641 4.23612419]
z2:  [1.10326152 3.74353249]
a2:  [1.10326152 3.74353249]
z2:  [1.82482265 4.45202299]
a2:  [1.82482265 4.45202299]
z2:  [1.406727  4.0483654]
a2:  [1.406727  4.0483654]
z2:  [1.575925

In [87]:
# predict example in given set
def predict(j, pred_set):
    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(pred_set[j, :]))
    #print('z2', z2)
    a2 = np.where(z2 > 0, z2, z2 * .01)
    #print('a2', a2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer, no final activation for now
    z3 = np.dot(weights2, a2_bias)

    # calculate error
    sq_error =  np.square(z3 - house_sk_target[j])
    print(f'Prediction: {z3}, actual: {house_sk_target[j]}, error: {sq_error}')
    return sq_error

In [88]:
tot_err = 0
# Predict CV example and see average error
for j in range(0, m_cv):
    global tot_err
    tot_err += predict(j, house_sk_cv_bias)

print('Average squared CV error ($): ', tot_err/m_cv)

Prediction: [13.04132294], actual: 24.0, error: [120.09260295]
Prediction: [13.04132294], actual: 21.6, error: [73.25095305]
Prediction: [13.33330752], actual: 34.7, error: [456.53554753]
Prediction: [13.04132294], actual: 33.4, error: [414.47573171]
Prediction: [13.98848216], actual: 36.2, error: [493.35152477]
Prediction: [13.20887349], actual: 28.7, error: [239.97500042]
Prediction: [13.41602946], actual: 22.9, error: [89.94569715]
Prediction: [13.9515452], actual: 27.1, error: [172.8818637]
Prediction: [14.19662124], actual: 16.5, error: [5.30555371]
Prediction: [14.9914704], actual: 18.9, error: [15.27660364]
Prediction: [15.78824906], actual: 15.0, error: [0.62133658]
Prediction: [15.64197464], actual: 18.9, error: [10.61472928]
Prediction: [15.67019275], actual: 21.7, error: [36.35857544]
Prediction: [14.84898243], actual: 20.4, error: [30.81379606]
Prediction: [15.40207556], actual: 18.2, error: [7.82838117]
Prediction: [15.67713459], actual: 19.9, error: [17.83259227]
Predicti

In [89]:
tot_err = 0
# Predict in training example and see average error
for j in range(0, m_train):
    global tot_err
    tot_err += predict(j, house_sk_train_bias)

print('Average squared training error ($): ', tot_err/m_train)

Prediction: [16.72764487], actual: 24.0, error: [52.88714913]
Prediction: [14.38993411], actual: 21.6, error: [51.9850501]
Prediction: [14.60689403], actual: 34.7, error: [403.7329074]
Prediction: [13.66936537], actual: 33.4, error: [389.29794291]
Prediction: [13.54835824], actual: 36.2, error: [513.09687443]
Prediction: [13.69655199], actual: 28.7, error: [225.10345207]
Prediction: [16.89045254], actual: 22.9, error: [36.1146607]
Prediction: [16.8211533], actual: 27.1, error: [105.65468947]
Prediction: [17.36861728], actual: 16.5, error: [0.75449597]
Prediction: [17.36435271], actual: 18.9, error: [2.3582126]
Prediction: [17.05463842], actual: 15.0, error: [4.22153905]
Prediction: [16.8211533], actual: 18.9, error: [4.3216036]
Prediction: [17.16231878], actual: 21.7, error: [20.59055088]
Prediction: [12.5457258], actual: 20.4, error: [61.68962323]
Prediction: [12.55472404], actual: 18.2, error: [31.86914069]
Prediction: [12.54640813], actual: 19.9, error: [54.0753134]
Prediction: [12.