### Import Packages

In [136]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
plt.style.use('bmh')
%matplotlib inline

### Load data from UCI database

In [7]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'

cols = ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'bk', 'lstat', 'medv']

df = pd.read_csv (url, header=None, names = cols, delim_whitespace=True)

df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,bk,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


### Create input variable matrix X and dependent variant y

In [282]:
# take 'rm' & 'age' as inputs, 'medv' as dependent

X = df.loc[:, ['rm', 'age']].values
y = df.medv.values

### Codes for algorithm

In [283]:
# define cost function

def cost(theta, X, y):
    
    cost = (X.dot(theta) - y).dot(X.dot(theta) - y)*(1/(2*len(y)))
    
    return cost

In [284]:
# define gradient descent

def gradient_descent(X, y, theta, lr  = 0.001, n = 100):
    
    m = len(y)
    cost_history = np.zeros(n)
    theta_history = np.zeros([theta.shape[0], n])
    theta_temp = np.zeros(theta.shape)
    
    for i in range(n):        
        
        for j in range(len(theta)):
            
            theta[j] = theta[j] - lr * (1/m) * X[:, j].dot(X.dot(theta) - y)
            
            theta_history[:, i] = theta.T
            cost_history[i] = cost(theta, X, y)
            print(theta)
        
    return theta, cost_history, theta_history

### Test Results

In [285]:
t = np.random.randn(2) * 100
t

array([241.97423564,  18.72137557])

In [286]:
theta, cost_hist, theta_hist = gradient_descent(X, y, t, n = 100)

[224.46441404  18.72137557]
[ 224.46441404 -178.34563619]
[ 291.64981239 -178.34563619]
[291.64981239 678.50130916]
[ -9.06162911 678.50130916]
[   -9.06162911 -3043.40632154]
[ 1288.6246539  -3043.40632154]
[ 1288.6246539  13127.16959102]
[-4357.89679442 13127.16959102]
[ -4357.89679442 -57125.54127134]
[ 20164.88927137 -57125.54127134]
[ 20164.88927137 248089.39475526]
[-86383.40700297 248089.39475526]
[  -86383.40700297 -1077922.17450415]
[  376509.22628124 -1077922.17450415]
[ 376509.22628124 4682961.55627499]
[-1634545.28277168  4682961.55627499]
[ -1634545.28277168 -20345309.29810547]
[  7102511.78461924 -20345309.29810547]
[ 7102511.78461924 88390501.26226899]
[-30855811.04878985  88390501.26226899]
[-3.08558110e+07 -3.84014344e+08]
[ 1.34054853e+08 -3.84014344e+08]
[1.34054853e+08 1.66835770e+09]
[-5.82402746e+08  1.66835770e+09]
[-5.82402746e+08 -7.24821260e+09]
[ 2.53026123e+09 -7.24821260e+09]
[2.53026123e+09 3.14900006e+10]
[-1.09927680e+10  3.14900006e+10]
[-1.09927680e+10