In [174]:
import numpy as np
import math
from sklearn import datasets
from sklearn import preprocessing
from scipy.special import expit

In [175]:
#load development data
house_price_train = [[240,2],[260,2],[260,3],[360,2],[420,1],[350,2],[285,1]]
size_train = [1200,1400,1900,2600,2700,2400,1150]

house_sk_data = datasets.load_boston()
# size of training and cross validation sets
m_train = 400
m_cv = 506 - m_train

# training set, using features:
# B 1000(Bk - 0.63)^2, LSTAT % lower status of the population
# scaled for zero mean and unit variance
house_sk_train = preprocessing.scale(house_sk_data.data[:m_train, 10:12])
house_sk_train_bias = np.column_stack([np.ones((m_train, 1)), house_sk_train])

# CV set
house_sk_cv = preprocessing.scale(house_sk_data.data[m_train:, 10:12])
house_sk_cv_bias = np.column_stack([np.ones((m_cv, 1)), house_sk_cv])

# target set
house_sk_target = house_sk_data.target[:m_train]

In [176]:
# creating: neural net for regression
# backpropagation from scratch
# 2 features
# 1 hidden layer, 2 nodes, sigmoid activation
# MSE loss function, no regularization

#randomly initialize weights for first and second layer
weights = np.random.rand(2,3)
weights2 = np.random.rand(1,3)

In [177]:
# Forward and backward propagates for a given training example _i_
def propagate(i):
    global weights2, weights

    #learning rate
    a = 0.05

    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(house_sk_train_bias[i, :]))
    a2 = expit(z2)
    #a2 = np.maximum(0, z2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer and final activation (ReLu)
    z3 = np.dot(weights2, a2_bias)
    hypot = z3
    #hypot = np.maximum(0, z3)

    #print(f'Prediction: {z3}')

    # Backpropagates to accumulate partials and deltas
    # Delta for third layer (mean squared error)
    delta3 =  hypot - house_sk_target[i]
    # derivative of error w.r.t weights, second layer
    layer2partials = np.multiply(delta3, a2_bias)

    # derivative of error w.r.t weights, first layer
    # big_delta2 without bias
    delta2 = np.multiply(delta3, a2)
    delta2_matrix = np.column_stack([delta2, delta2, delta2])
    feat_bias_matrix = np.row_stack([house_sk_train_bias[i,:], house_sk_train_bias[i,:]])
    layer1partials = np.multiply(delta2_matrix, feat_bias_matrix)

    weights2 = weights2 - a*layer2partials
    weights = weights - a*layer1partials

# todo: vectorized implementation
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [178]:
# Train weights
for i in range(0, m_train):
    propagate(i)

In [179]:
# predict example in given set
def predict(j, pred_set):
    # z for second layer and activation (ReLu)
    z2 = np.dot(weights, np.transpose(pred_set[j, :]))
    #print('z2', z2)
    a2 = expit(z2)
    #a2 = np.maximum(0, z2)
    #print('a2', a2)
    # add bias unit for second layer
    a2_bias = np.concatenate((np.ones(1), a2))
    # z for third layer, no final activation for now
    z3 = np.dot(weights2, a2_bias)

    # calculate error
    delta3 =  z3 - house_sk_target[j]
    #print(f'Prediction: {z3}, actual: {house_sk_target[j]}, error: {delta3}')
    return np.square(z3 - house_sk_target[j])

In [180]:
tot_err = 0
# Predict CV example and see average error
for j in range(0, m_cv):
    global tot_err
    tot_err += predict(j, house_sk_cv_bias)

print('Average squared CV error ($): ', tot_err/m_cv)

Average squared CV error ($):  [34.78347861]


In [181]:
tot_err = 0
# Predict in training example and see average error
for j in range(0, m_train):
    global tot_err
    tot_err += predict(j, house_sk_train_bias)

print('Average squared training error ($): ', tot_err/m_train)


Average squared training error ($):  [80.3780794]
