In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

# Load Boston House Price Dataset

In [2]:
boston_data = datasets.load_boston()

In [3]:
X=boston_data.data
y=boston_data.target

In [4]:
#Normalize Dataset for better and faster converging of gradient descent
X = (X - X.mean())/X.std()

In [5]:
#Split the data for training and testing pupose
X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.3,random_state=42)

# Initialize Hyperparameters

In [6]:
# Learning rate decides how fast is the gradient decent would move in order to converge
# Problem with high learning rate is that it might never coverge
# Where problem with lower learning rate is that it might take huge amount of time to converge
# Its always interseting to play with and find a perfect balance where a learning rate will 
# converge and also doesnot takes ages to do so
learning_rate=0.1 

# Iteration count decides how many time you want the gradient descent keeps using training data to reduce error
iteration_count=40000

# intercept is a constant that determines where the classification line intersects the varriable axes in space 
# having no intercept will constrain the classification line to pass through orgion of all axes in coordinate 
# space , and with intercept there will be a good flexiblity for the algorithm to find best classification line
intercept = True

In [7]:
def error(yHat,y_train):
    temp = yHat-y_train
    return np.mean(np.sqrt(temp**2))

In [8]:
# Add the intecept variable in training and test data if the value is True
if intercept:
    X_train_intercept = np.ones((X_train.shape[0], 1))
    X_test_intercept = np.ones((X_test.shape[0], 1))
    X_train = np.concatenate((X_train_intercept, X_train), axis=1)
    X_test = np.concatenate((X_test_intercept, X_test), axis=1)

In [9]:
# Initializing the coefficients
theta = np.zeros(X_train.shape[1])

# Trainig on Data ; Find coefficients of linear equation

In [10]:
# Initialize the coefficients to zero
yHat = np.dot(X_train, theta)

In [11]:
# calculate the direction of change for coefficients
gradient = np.dot(X_train.T, (yHat- y_train)) / y.size

In [12]:
# re evaluate coefficients by subtracting the multiple of learning rate and gradient
theta -= learning_rate * gradient

In [13]:
print("Initial Error :",error(yHat,y_train))

Initial Error : 23.015819209


In [14]:
# now iterate the above steps for iteration_count-1 times and we have already done it one time
for i in range(1,iteration_count):
    yHat = np.dot(X_train, theta)
    
    gradient = np.dot(X_train.T, (yHat- y_train)) / y.size
    theta -= learning_rate * gradient
    
    rmse = error(yHat , y_train)
    
    if(i%1000==0):
        print("iteration number : ",i,"; error : ",rmse)

iteration number :  1000 ; error :  5.35297861294
iteration number :  2000 ; error :  5.10549650286
iteration number :  3000 ; error :  4.9165991875
iteration number :  4000 ; error :  4.75507676995
iteration number :  5000 ; error :  4.62804087341
iteration number :  6000 ; error :  4.51991534116
iteration number :  7000 ; error :  4.42606963717
iteration number :  8000 ; error :  4.34409049912
iteration number :  9000 ; error :  4.27625361397
iteration number :  10000 ; error :  4.22239947922
iteration number :  11000 ; error :  4.18472652736
iteration number :  12000 ; error :  4.15691982486
iteration number :  13000 ; error :  4.1342647397
iteration number :  14000 ; error :  4.11784110321
iteration number :  15000 ; error :  4.10409585524
iteration number :  16000 ; error :  4.0941911164
iteration number :  17000 ; error :  4.08572506242
iteration number :  18000 ; error :  4.078861767
iteration number :  19000 ; error :  4.07257918605
iteration number :  20000 ; error :  4.066444

In [15]:
# store prediction result
prediction=np.dot(X_test, theta)

# Evaluate Result

In [16]:
print("Train Error :",error(y_train,np.dot(X_train,theta)))

Train Error : 3.96833501553


In [17]:
print("Test Error :",error(prediction,y_test))

Test Error : 3.99579480029
