In [101]:
import numpy as np
import math
from sklearn import datasets
from sklearn import preprocessing
from scipy.special import expit

In [102]:
#load development data
house_price_train = [[240,2],[260,2],[260,3],[360,2],[420,1],[350,2],[285,1]]
size_train = [1200,1400,1900,2600,2700,2400,1150]

house_sk_data = datasets.load_boston()
# size of training and cross validation sets
m_train = 400
m_cv = 506 - m_train

# training set, using features:
# B 1000(Bk - 0.63)^2, LSTAT % lower status of the population
# scaled for zero mean and unit variance
house_sk_train = preprocessing.scale(house_sk_data.data[:m_train, 10:12])
house_sk_train_bias = np.column_stack([np.ones((m_train, 1)), house_sk_train])

# CV set
house_sk_cv = preprocessing.scale(house_sk_data.data[m_train:, 10:12])
house_sk_cv_bias = np.column_stack([np.ones((m_cv, 1)), house_sk_cv])

# target set
house_sk_target = house_sk_data.target[:m_train]

In [103]:
# creating: neural net for regression
# backpropagation from scratch
# Training on m=7 examples, 2 features
# 1 hidden layer, 2 nodes, ReLu activation
# MSE loss function, no regularization

#randomly initialize weights for first and second layer
weights = np.random.rand(2,3)
weights2 = np.random.rand(1,3)

In [104]:
# Forward propagates for a given training example _i_
def propagate(i):
    global weights2, weights

    #learning rate
    a = 0.05

    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(house_sk_train_bias[i, :]))
    a2 = expit(z2)
    #a2 = np.maximum(0, z2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    print('a2 bias: ', a2_bias)
    # z for third layer and final activation (ReLu)
    z3 = np.dot(weights2, a2_bias)
    hypot = z3
    #hypot = np.maximum(0, z3)

    print(f'Prediction: {z3}')

    # Backpropagates to accumulate partials and deltas
    # Delta for third layer (mean squared error)
    delta3 =  hypot - house_sk_target[i]
    # derivative of error w.r.t weights, second layer
    layer2partials = np.multiply(delta3, a2_bias)

    # derivative of error w.r.t weights, first layer
    # big_delta2 without bias
    delta2 = np.multiply(delta3, a2)
    delta2_matrix = np.column_stack([delta2, delta2, delta2])
    feat_bias_matrix = np.row_stack([house_sk_train_bias[i,:], house_sk_train_bias[i,:]])
    layer1partials = np.multiply(delta2_matrix, feat_bias_matrix)

    weights2 = weights2 - a*layer2partials
    weights = weights - a*layer1partials

# todo: vectorized implementation
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [105]:
# Train weights
for i in range(0, m_train):
    propagate(i)

a2 bias:  [1.         0.49011332 0.44643012]
Prediction: [1.31880585]
a2 bias:  [1.         0.82791876 0.81558075]
Prediction: [3.88652819]
a2 bias:  [1.         0.91455942 0.89985882]
Prediction: [6.32058439]
a2 bias:  [1.         0.97878047 0.97699795]
Prediction: [10.54596201]
a2 bias:  [1.         0.994993   0.99477918]
Prediction: [14.025543]
a2 bias:  [1.         0.99859008 0.99844988]
Prediction: [17.36952803]
a2 bias:  [1.         0.99768592 0.99660966]
Prediction: [19.0490921]
a2 bias:  [1.         0.99872854 0.99818045]
Prediction: [19.64096373]
a2 bias:  [1.         0.99917017 0.99855709]
Prediction: [20.76300652]
a2 bias:  [1.         0.99855567 0.99749437]
Prediction: [20.11373272]
a2 bias:  [1.         0.99879009 0.9981191 ]
Prediction: [19.9375978]
a2 bias:  [1.         0.99816084 0.99736974]
Prediction: [19.18999089]
a2 bias:  [1.         0.99726467 0.99558863]
Prediction: [19.13028823]
a2 bias:  [1.         0.99971972 0.99976242]
Prediction: [19.5552882]
a2 bias:  [1. 

In [141]:
# predict example in given set
def predict(j, pred_set):
    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(pred_set[j, :]))
    print('z2', z2)
    a2 = expit(z2)
    #a2 = np.maximum(0, z2)
    print('a2', a2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer, no final activation for now
    z3 = np.dot(weights2, a2_bias)

    # calculate error
    delta3 =  z3 - house_sk_target[j]
    print(f'Prediction: {z3}, actual: {house_sk_target[j]}, error: {delta3}')
    return delta3[0]

In [144]:
tot_err = 0
# Predict CV example and see average error
for j in range(0, m_cv):
    global tot_err
    tot_err += predict(j, house_sk_cv_bias)

print('Average error: ', round(tot_err/m_cv, 4))

z2 [6.64139653 9.00493599]
a2 [0.9986965  0.99987721]
Prediction: [25.13103486], actual: 24.0, error: [1.13103486]
z2 [6.64139653 9.00493599]
a2 [0.9986965  0.99987721]
Prediction: [25.13103486], actual: 21.6, error: [3.53103486]
z2 [6.07557584 7.97421322]
a2 [0.99770695 0.99965589]
Prediction: [25.12460015], actual: 34.7, error: [-9.57539985]
z2 [6.64139653 9.00493599]
a2 [0.9986965  0.99987721]
Prediction: [25.13103486], actual: 33.4, error: [-8.26896514]
z2 [4.80594932 5.66140814]
a2 [0.99188545 0.99653444]
Prediction: [25.07787827], actual: 36.2, error: [-11.12212173]
z2 [6.31670962 8.41347266]
a2 [0.99819738 0.99977819]
Prediction: [25.12785071], actual: 28.7, error: [-3.57214929]
z2 [5.91527358 7.68219988]
a2 [0.99730934 0.99953925]
Prediction: [25.12187958], actual: 22.9, error: [2.22187958]
z2 [4.87752741 5.79179779]
a2 [0.99244174 0.9969568 ]
Prediction: [25.0829476], actual: 27.1, error: [-2.0170524]
z2 [4.40260816 4.92666491]
a2 [0.98790277 0.99280155]
Prediction: [25.038129