In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def relu(z):
    a = np.maximum(0,z)
    return a

def initialize_params(layer_sizes):
    params = {}
    for i in range(1, len(layer_sizes)):
        params['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1])*0.01
        params['B' + str(i)] = np.random.randn(layer_sizes[i],1)*0.01
    return params

def forward_propagation(X_train, params):
    layers = len(params)//2
    values = {}
    for i in range(1, layers+1):
        if i==1:
            values['Z' + str(i)] = np.dot(params['W' + str(i)], X_train) + params['B' + str(i)]
            values['A' + str(i)] = relu(values['Z' + str(i)])
        else:
            values['Z' + str(i)] = np.dot(params['W' + str(i)], values['A' + str(i-1)]) + params['B' + str(i)]
            if i==layers:
                values['A' + str(i)] = values['Z' + str(i)]
            else:
                values['A' + str(i)] = relu(values['Z' + str(i)])
    return values

def compute_cost(values, Y_train):
    layers = len(values)//2
    Y_pred = values['A' + str(layers)]
    cost = 1/(2*len(Y_train)) * np.sum(np.square(Y_pred - Y_train))
    return cost

def backward_propagation(params, values, X_train, Y_train):
    layers = len(params)//2
    m = len(Y_train)
    grads = {}
    for i in range(layers,0,-1):
        if i==layers:
            dA = 1/m * (values['A' + str(i)] - Y_train)
            dZ = dA
        else:
            dA = np.dot(params['W' + str(i+1)].T, dZ)
            dZ = np.multiply(dA, np.where(values['A' + str(i)]>=0, 1, 0))
        if i==1:
            grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
        else:
            grads['W' + str(i)] = 1/m * np.dot(dZ,values['A' + str(i-1)].T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
    return grads

def update_params(params, grads, learning_rate):
    layers = len(params)//2
    params_updated = {}
    for i in range(1,layers+1):
        params_updated['W' + str(i)] = params['W' + str(i)] - learning_rate * grads['W' + str(i)]
        params_updated['B' + str(i)] = params['B' + str(i)] - learning_rate * grads['B' + str(i)]
    return params_updated

def model(X_train, Y_train, layer_sizes, num_iters, learning_rate):
    params = initialize_params(layer_sizes)
    for i in range(num_iters):
        values = forward_propagation(X_train.T, params)
        cost = compute_cost(values, Y_train.T)
        grads = backward_propagation(params, values,X_train.T, Y_train.T)
        params = update_params(params, grads, learning_rate)
        print('Cost at iteration ' + str(i+1) + ' = ' + str(cost) + '\n')
    return params

def compute_accuracy(X_train, X_test, Y_train, Y_test, params):
    values_train = forward_propagation(X_train.T, params)
    values_test = forward_propagation(X_test.T, params)
    train_acc = np.sqrt(mean_squared_error(Y_train, values_train['A' + str(len(layer_sizes)-1)].T))
    test_acc = np.sqrt(mean_squared_error(Y_test, values_test['A' + str(len(layer_sizes)-1)].T))
    return train_acc, test_acc

def predict(X, params):
    values = forward_propagation(X.T, params)
    predictions = values['A' + str(len(values)//2)].T
    return predictions

data = load_boston()                                                              #load dataset
X,Y = data["data"], data["target"]                                                #separate data into input and output features
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2)           #split data into train and test sets in 80-20 ratio
layer_sizes = [13, 5, 5, 1]                                                       #set layer sizes, do not change the size of the first and last layer 
num_iters = 1000                                                                  #set number of iterations over the training set(also known as epochs in batch gradient descent context)
learning_rate = 0.03                                                              #set learning rate for gradient descent
params = model(X_train, Y_train, layer_sizes, num_iters, learning_rate)           #train the model
train_acc, test_acc = compute_accuracy(X_train, X_test, Y_train, Y_test, params)  #get training and test accuracy
print('Root Mean Squared Error on Training Data = ' + str(train_acc))
print('Root Mean Squared Error on Test Data = ' + str(test_acc))


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

Cost at iteration 1 = 293.5256632160988

Cost at iteration 2 = 293.4867010255943

Cost at iteration 3 = 293.447844037516

Cost at iteration 4 = 293.409079837875

Cost at iteration 5 = 293.37039610540853

Cost at iteration 6 = 293.33178625287184

Cost at iteration 7 = 293.29323126309566

Cost at iteration 8 = 293.25472545578697

Cost at iteration 9 = 293.21620235906425

Cost at iteration 10 = 293.1775228877344

Cost at iteration 11 = 293.1385478201749

Cost at iteration 12 = 293.09926725539134

Cost at iteration 13 = 293.05963951727887

Cost at iteration 14 = 293.0197803892808

Cost at iteration 15 = 292.979729301569

Cost at iteration 16 = 292.9394993185929

Cost at iteration 17 = 292.89911819573035

Cost at iteration 18 = 292.8586106934559

Cost at iteration 19 = 292.81796578769695

Cost at iteration 20 = 292.77709801122325

Cost at iteration 21 = 292.7359664265728

Cost at iteration 22 = 292.6947232875537

Cost at iteration 23 = 292.6533871244845

Cost at iteration 24 = 292.611894227

Cost at iteration 788 = 33.72919809899811

Cost at iteration 789 = 33.72231315983525

Cost at iteration 790 = 33.71543085846242

Cost at iteration 791 = 33.70855118671306

Cost at iteration 792 = 33.701674136196

Cost at iteration 793 = 33.69479969829657

Cost at iteration 794 = 33.68792786417783

Cost at iteration 795 = 33.68105862478169

Cost at iteration 796 = 33.6741919708302

Cost at iteration 797 = 33.66732789282677

Cost at iteration 798 = 33.66046638105742

Cost at iteration 799 = 33.65360742559215

Cost at iteration 800 = 33.64675101628621

Cost at iteration 801 = 33.63989714278144

Cost at iteration 802 = 33.6330457945077

Cost at iteration 803 = 33.626196960684226

Cost at iteration 804 = 33.61935063032106

Cost at iteration 805 = 33.61250679222046

Cost at iteration 806 = 33.605665434978434

Cost at iteration 807 = 33.598826546986174

Cost at iteration 808 = 33.59199011643155

Cost at iteration 809 = 33.585156131300714

Cost at iteration 810 = 33.57832457937954

Cost at ite