In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [56]:
dataset=pd.read_csv("house_data.csv")
dataset.head()
# this is pandas dataframe object

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [57]:
x=dataset.loc[:,["bedrooms","bathrooms","sqft_living","price"]]
x.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,price
0,3,1.0,1180,221900.0
1,3,2.25,2570,538000.0
2,2,1.0,770,180000.0
3,4,3.0,1960,604000.0
4,3,2.0,1680,510000.0


In [61]:
# applying feature scaling
data=(x-x.mean())/x.std()

data.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,price
0,-0.398728,-1.44743,-0.979812,-0.866697
1,-0.398728,0.175603,0.533622,-0.005688
2,-1.473925,-1.44743,-1.426221,-0.980827
3,0.676469,1.149422,-0.130547,0.174086
4,-0.398728,-0.149004,-0.435412,-0.081956


In [62]:
# convertind pandas dataFrame object to numpy array for faster indexing
data=data.values
N=len(data)
data

array([[-0.39872792, -1.44743008, -0.97981235, -0.86669728],
       [-0.39872792,  0.17560263,  0.53362199, -0.00568779],
       [-1.47392526, -1.44743008, -1.42622105, -0.98082666],
       ...,
       [-1.47392526, -1.77203663, -1.15402062, -0.3758565 ],
       [-0.39872792,  0.50020918, -0.52251564, -0.38157931],
       [-1.47392526, -1.77203663, -1.15402062, -0.58586818]])

<h3>
making <strong>y=H(X)=X.Theta </strong> as is a matrix of independent features of all training data represented each instance 
as row and features as column where column first is 1.
theta is vector of features and h(x)=y is vector of parameters</h3>

In [63]:
temp = np.ones(N).reshape(N,1)
temp

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]])

In [64]:
temp_data=data[:,:-1]
temp_data

array([[-0.39872792, -1.44743008, -0.97981235],
       [-0.39872792,  0.17560263,  0.53362199],
       [-1.47392526, -1.44743008, -1.42622105],
       ...,
       [-1.47392526, -1.77203663, -1.15402062],
       [-0.39872792,  0.50020918, -0.52251564],
       [-1.47392526, -1.77203663, -1.15402062]])

In [65]:
y=data[:,-1].reshape(N,1)
y

array([[-0.86669728],
       [-0.00568779],
       [-0.98082666],
       ...,
       [-0.3758565 ],
       [-0.38157931],
       [-0.58586818]])

In [66]:
temp.shape

(21613, 1)

In [67]:
x=np.concatenate((temp,temp_data),axis=1)
x

array([[ 1.        , -0.39872792, -1.44743008, -0.97981235],
       [ 1.        , -0.39872792,  0.17560263,  0.53362199],
       [ 1.        , -1.47392526, -1.44743008, -1.42622105],
       ...,
       [ 1.        , -1.47392526, -1.77203663, -1.15402062],
       [ 1.        , -0.39872792,  0.50020918, -0.52251564],
       [ 1.        , -1.47392526, -1.77203663, -1.15402062]])

In [84]:
# computing cost by vector matrix method(no loop needes as every ith item of training set are in single matrix)
def error_calculation_vector(theta):
    error=np.power(((np.dot(x,theta))-y),2)
    #avg_error=np.sum(error)/N
    avg_error=np.mean(error,axis=0)
    return avg_error


In [150]:
#computing cost by iterative method
def error_calculation_iterative(a,b,c,d):
    total_error = 0

    for i in range(N):
        real_output = y[i]
        x0 = x[i,0]
        x1 = x[i,1]
        x2 = x[i,2]
        x3 = x[i,3]
        predicted_output = (a*x0+b*x1+c*x2+d*x3)
        total_error+=(predicted_output-real_output)**2
    return total_error/N


In [113]:
#applying gradient descent by vector method
def gradient_descent_vector(theta,learning_rate):
    d_theta=2*(np.dot(x,theta)-y)*(x)
    # also can broadcast withiout matrix multipiction 
    #d_theta=2*(x*theta.T-y)*(x)
    #here d_theta is vectoron N*4 where N is no of training set and axis 1 is its theta0,....theta4
    avg_theta=np.mean(d_theta,axis=0)
    #making column vector
    avg_theta=avg_theta.reshape(len(avg_theta),1)
    # moving towards minima with constant learning rate
    theta_new=theta-learning_rate*avg_theta
    return theta_new
    

In [166]:
#applying gradient descent in iterative method
def gradient_descent_iterative(a,b,c,d, learning_rate):
    da=0
    db=0
    dc=0
    dd=0
    for i in range(N):
        real_output = y[i]
        x0 = x[i,0]
        x1 = x[i,1]
        x2 = x[i,2]
        x3 = x[i,3]
        da+=(2/N)*(a*x0+b*x1+c*x2+d*x3-real_output)*x0
        db+=(2/N)*(a*x0+b*x1+c*x2+d*x3-real_output)*x1
        dc+=(2/N)*(a*x0+b*x1+c*x2+d*x3-real_output)*x2
        dd+=(2/N)*(a*x0+b*x1+c*x2+d*x3-real_output)*x3
    new_a=a-learning_rate*da
    new_b=b-learning_rate*db
    new_c=c-learning_rate*dc
    new_d=d-learning_rate*dd
    return [new_a,new_b,new_c,new_d]

    
    

In [171]:
def gradient_descent_looper_vector(iteration,theta):
    learning_rate=0.001
    #iteration+1 so it can  print info even on 1000th iter if  i initalized iteration 1000..also for last iter
    for i in range(iteration+1):
        theta=gradient_descent_vector(theta,learning_rate)
        if (i%10000)==0:
            print(f'At step {i} line is {theta[0]}+{theta[1]}X1+{theta[2]}X2+{theta[3]}X3 ----->ERROR: {error_calculation_vector(theta)}')
    return theta
                  
        

In [172]:
#constructing theta vector ,initialising every element 0
theta=np.array([0,0,0,0]).reshape(4,1)
theta

array([[0],
       [0],
       [0],
       [0]])

In [173]:
desired_theta=gradient_descent_looper_vector(100000,theta)

At step 0 line is [-6.83814783e-20]+[0.00061667]X1+[0.00105023]X2+[0.00140401]X3 ----->ERROR: [0.99650659]
At step 10000 line is [-1.84594157e-16]+[-0.14601984]X1+[0.01976687]X2+[0.77047347]X3 ----->ERROR: [0.49306281]
At step 20000 line is [-1.87353087e-16]+[-0.14657739]X1+[0.01666692]X2+[0.77397706]X3 ----->ERROR: [0.49305739]
At step 30000 line is [-1.87349141e-16]+[-0.14658188]X1+[0.01664153]X2+[0.77400569]X3 ----->ERROR: [0.49305739]
At step 40000 line is [-1.87320211e-16]+[-0.14658192]X1+[0.01664133]X2+[0.77400592]X3 ----->ERROR: [0.49305739]
At step 50000 line is [-1.87335991e-16]+[-0.14658192]X1+[0.01664132]X2+[0.77400592]X3 ----->ERROR: [0.49305739]
At step 60000 line is [-1.87343881e-16]+[-0.14658192]X1+[0.01664132]X2+[0.77400592]X3 ----->ERROR: [0.49305739]
At step 70000 line is [-1.87347826e-16]+[-0.14658192]X1+[0.01664132]X2+[0.77400592]X3 ----->ERROR: [0.49305739]
At step 80000 line is [-1.87349141e-16]+[-0.14658192]X1+[0.01664132]X2+[0.77400592]X3 ----->ERROR: [0.4930573

In [174]:
#since matrix manipulation is very fast so iterating 100K times
# in iterative its slow is only 100 times>HAHAHAHHA

In [168]:
def gradient_descent_looper_iterative(iteration,a,b,c,d):
    learning_rate=0.001
    #iteration+1 so it can  print info even on 1000th iter if  i initalized iteration 1000..also for last iter
    for i in range(iteration+1):
        a,b,c,d=gradient_descent_iterative(a,b,c,d,learning_rate)
        if (i%10)==0:
            print(f'At step {i} line is {a}+{b}X1+{c}X2+{d}X3 ----->ERROR: {error_calculation_iterative(a,b,c,d)}')
    return [a,b,c,d]
                  
        

In [169]:
desired_a,desired_b,desired_c,desired_d=gradient_descent_looper_iterative(100,0,0,0,0)

At step 0 line is [-3.5812553e-20]+[0.00061667]X1+[0.00105023]X2+[0.00140401]X3 ----->ERROR: [0.99650659]
At step 10 line is [-5.83273664e-19]+[0.00656996]X1+[0.01128894]X2+[0.01516694]X3 ----->ERROR: [0.9636023]
At step 20 line is [-1.5435583e-18]+[0.01214797]X1+[0.02106308]X2+[0.02844102]X3 ----->ERROR: [0.93337673]
At step 30 line is [-1.86054514e-18]+[0.01736828]X1+[0.03039276]X2+[0.04124695]X3 ----->ERROR: [0.90560241]
At step 40 line is [-2.5544142e-18]+[0.02224766]X1+[0.03929725]X2+[0.05360451]X3 ----->ERROR: [0.88007134]
At step 50 line is [-3.12154003e-18]+[0.02680218]X1+[0.04779495]X2+[0.06553266]X3 ----->ERROR: [0.85659331]
At step 60 line is [-4.59618373e-18]+[0.03104715]X1+[0.05590349]X2+[0.07704949]X3 ----->ERROR: [0.8349944]
At step 70 line is [-5.46931885e-18]+[0.03499723]X1+[0.06363971]X2+[0.08817232]X3 ----->ERROR: [0.81511556]
At step 80 line is [-6.00145205e-18]+[0.03866641]X1+[0.07101972]X2+[0.09891773]X3 ----->ERROR: [0.79681136]
At step 90 line is [-7.04922164e-1

In [170]:
# for testing first do feature scaling for independent frature and computedesired value Y
# since our y is also featured scale apply reverse feature scaling to output as we have to scale back it to orginalfor
#y*std+mean
