In [28]:
import graphlab as gl
import numpy as np
from math import sqrt

In [30]:
sales=gl.SFrame('kc_house_data.gl/')

In [31]:
def get_numpy_data(data_sframe,features,output):
    data_sframe['constant']=1
    features=['constant']+features
    features_sframe=data_sframe[features]
    features_matrix=features_sframe.to_numpy()
    output_sarray=data_sframe[output]
    output_array=output_sarray.to_numpy()
    return features_matrix,output_array

In [32]:
def predict_outcome(feature_matrix,weights):
    predictions=np.dot(feature_matrix,weights)
    return predictions

In [33]:
def feature_derivative(errors,feature):
    derivative=2*np.dot(errors,feature)
    return derivative

In [34]:
def regression_gradient_descent(feature_matrix,output,initial_weights,step_size,tolerance):
    converged=False
    weights=np.array(initial_weights)
    while not converged:
        predictions=predict_outcome(feature_matrix,weights)
        errors=predictions - output
        
        gradient_sum_squares=0
        for i in range(len(weights)):
            derivative=feature_derivative(errors,feature_matrix[:,i])
            gradient_sum_squares+=(derivative**2)
            weights[i]=weights[i] - (step_size*derivative)
            
        gradient_magnitude=sqrt(gradient_sum_squares)
        if gradient_magnitude<tolerance:
            converged=True
            
    return weights

In [35]:
train_data,test_data=sales.random_split(.8,seed=0)

In [36]:
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [37]:
simple_weights = regression_gradient_descent(simple_feature_matrix, output,initial_weights, step_size,                                             tolerance)

In [38]:
simple_weights

array([-46999.88716555,    281.91211912])

In [39]:
def get_rss_weights(predictions,output):
    rss=(predictions-output)**2
    rss=rss.sum(axis=0)
    return rss

In [40]:
test_simple_feature_matrix,test_output=get_numpy_data(test_data,simple_features,my_output)

In [41]:
prediction_test1=predict_outcome(test_simple_feature_matrix,simple_weights)

In [42]:
prediction_test1[0]

356134.44317092968

In [43]:
rss_test_1=get_rss_weights(prediction_test1,test_output)
rss_test_1

275400047593155.94

# Mutiple features

In [44]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features,my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [45]:
regression_weights=regression_gradient_descent(feature_matrix,output,initial_weights,step_size,tolerance)

In [46]:
regression_weights

array([ -9.99999688e+04,   2.45072603e+02,   6.52795277e+01])

In [47]:
test_multiple_feature_matrix,test_output=get_numpy_data(test_data, model_features,my_output)

In [48]:
predictions_2=predict_outcome(test_multiple_feature_matrix,regression_weights)

In [49]:
predictions_2[0]

366651.41203655914

In [50]:
rss_2=get_rss_weights(predictions_2,test_output)
rss_2

270263446465244.06

In [51]:
test_output[0]

310000.0