## Read data

In [77]:
import turicreate

In [78]:
sales = turicreate.SFrame("home_data.sframe/home_data.sframe")

In [79]:
import numpy as np

## Create function(generate matrix from data)

In [80]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant']=1
    features = ['constant'] + features
    features_sframe=data_sframe[features]
    features_matrix = features_sframe.to_numpy()
    output_sarray = data_sframe[output]
    output_array = output_sarray.to_numpy()
    return(features_matrix, output_array)

In [81]:
#test function
get_numpy_data(sales, ['sqft_living'], 'price')

(array([[1.00e+00, 1.18e+03],
        [1.00e+00, 2.57e+03],
        [1.00e+00, 7.70e+02],
        ...,
        [1.00e+00, 1.02e+03],
        [1.00e+00, 1.60e+03],
        [1.00e+00, 1.02e+03]]),
 array([221900., 538000., 180000., ..., 402101., 400000., 325000.]))

## Create function(do the prediction)

In [82]:
def predict_outcome(feature_matrix, weights):
    predictions =  np.dot(feature_matrix, weights)
    return(predictions)

## Create function(do the derivative)

In [83]:
def feature_derivatives(errors, feature):
    derivative = 2*np.dot(errors, feature)
    return(derivative)

## Gradient decent function

In [88]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        predictions = predict_outcome(feature_matrix, weights)
        errors = predictions - output
        gradient_sum_squares = 0
        for i in range(len(weights)):
            derivative = feature_derivatives(errors, feature_matrix[:, i])
            gradient_sum_squares = gradient_sum_squares + derivative**2
            weights[i] = weights[i]-step_size*derivative
        gradient_magnitude = np.sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)
        
    

## Another way for Gradient decent

In [102]:
def regression_gradient_descent2(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        predictions = predict_outcome(feature_matrix, weights)
        errors = predictions - output
        derivative = feature_derivatives(errors, feature_matrix)
        weights = weights - step_size * derivative
        gradient_magnitude = np.sqrt(np.dot(derivative,derivative))
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

## Fit model based on all created functions

In [104]:
train_data, test_data = sales.random_split(0.8, seed=0)

In [105]:
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [91]:
simple_weights = regression_gradient_descent(simple_feature_matrix, 
                                             output,initial_weights, step_size, tolerance)

In [106]:
simple_weights2 = regression_gradient_descent2(simple_feature_matrix, 
                                             output,initial_weights, step_size, tolerance)

In [107]:
print(simple_weights,simple_weights2)

[-46999.88716555    281.91211912] [-46999.88716555    281.91211912]


## Predict house price based on the model for test data

In [109]:
test_simple_feature, test_output = get_numpy_data(test_data, ['sqft_living'], 'price')
prediction_test = predict_outcome(test_simple_feature, simple_weights)
prediction_test2 = predict_outcome(test_simple_feature, simple_weights2)

In [110]:
print(prediction_test[0],prediction_test2[0])

356134.44317092974 356134.44317092974


In [95]:
RSS_test = np.sum((test_output-prediction_test)**2)
RSS_test

275400047593155.94

## Fit model with more than one predictor

In [96]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features,my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [97]:
feature_weights = regression_gradient_descent(feature_matrix, output, 
                                              initial_weights, step_size, tolerance)

In [98]:
test_feature, test_output = get_numpy_data(test_data, ['sqft_living', 'sqft_living15'], 'price')
prediction_test2 = np.dot(test_feature, feature_weights)

In [99]:
prediction_test2[0]

366651.4120365591

In [67]:
RSS_test2 = np.sum((test_output-prediction_test2)**2)

In [68]:
RSS_test2

275019793347438.94

In [100]:
test_data['price'][0]

310000.0