### Mount the local drive to save the works

In [1]:
from google.colab import drive
drive.mount('/content/MyDrive/')

Mounted at /content/MyDrive/


In [3]:
#!pip install turicreate
import turicreate

In [4]:
#!7z x '/content/MyDrive/MyDrive/SFRAMES/home_data_small.sframe.zip'
sf = turicreate.SFrame('/content/MyDrive/MyDrive/SFRAMES/home_data.sframe')

In [5]:
sf.head(3)

id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront
7129300520,2014-10-13 00:00:00+00:00,221900.0,3.0,1.0,1180.0,5650.0,1.0,0
6414100192,2014-12-09 00:00:00+00:00,538000.0,3.0,2.25,2570.0,7242.0,2.0,0
5631500400,2015-02-25 00:00:00+00:00,180000.0,2.0,1.0,770.0,10000.0,1.0,0

view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat
0,3,7.0,1180.0,0.0,1955.0,0.0,98178,47.51123398
0,3,7.0,2170.0,400.0,1951.0,1991.0,98125,47.72102274
0,3,6.0,770.0,0.0,1933.0,0.0,98028,47.73792661

long,sqft_living15,sqft_lot15
-122.25677536,1340.0,5650.0
-122.3188624,1690.0,7639.0
-122.23319601,2720.0,8062.0


# **Gradient Descent Models**

In [6]:
import numpy as np
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe = data_sframe[features]
    # this will convert the features_sframe into a numpy matrix:
    features_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray = data_sframe[output]
    # this will convert the SArray into a numpy array:
    output_array = output_sarray.to_numpy()
    return (features_matrix, output_array)

In [7]:
def predict_outcome(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return predictions

In [8]:
def feature_derivative(errors, feature):
    derivative = 2 * np.dot(errors, feature)
    return (derivative)

In [9]:
from math import sqrt
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        # compute the predictions based on feature_matrix and weights:
        predictions = predict_outcome(feature_matrix, weights)
        # compute the errors as predictions - output:
        errors = predictions - output
        # initialize the gradient
        gradient_sum_squares = 0 
        # while not converged, update each weight individually:
        for i in range(len(weights)):
            derivative = feature_derivative(errors, feature_matrix[:, i])
            gradient_sum_squares += derivative * derivative
            weights[i] = weights[i] - (step_size * derivative)
        gradient_magnitude = sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

# **Train Using Our Model**

In [10]:
train_data, test_data = sf.random_split(.8, seed=0)

In [11]:
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7
simple_weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)
print('Simple weights are: ')
print(simple_weights)
print(round(simple_weights[1], 1))

Simple weights are: 
[-46999.88716555    281.91211912]
281.9


# **Testing**

In [12]:
#build test simple feature matrix and test output using test data
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)
simple_predicted_outcome = predict_outcome(test_simple_feature_matrix, simple_weights)
print('predicted price of 1st house: ')
print(round(simple_predicted_outcome[0]))

predicted price of 1st house: 
356134


In [13]:
test_errors = simple_predicted_outcome - test_output
RSS = np.square(test_errors).sum()
print('RSS is: ')
print(RSS)

RSS is: 
275400047593155.94


In [14]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9
(feature_matrix, test_output) = get_numpy_data(test_data, model_features, my_output)
multiple_weights = regression_gradient_descent(feature_matrix, test_output, initial_weights, step_size, tolerance)
print('Multiple weights are: ')
print(multiple_weights)
print(round(multiple_weights[1], 1))

Multiple weights are: 
[-9.99999374e+04  2.29212240e+02  8.33261579e+01]
229.2


In [15]:
multiple_predictions = predict_outcome(feature_matrix, multiple_weights)
print('price of 1st house model 2: ')
print(round(multiple_predictions[0]))

price of 1st house model 2: 
376094


In [16]:
print('Real house price: ' + str(test_data['price'][0]))
multiple_test_errors = multiple_predictions - test_output
RSSm = sum(multiple_test_errors * multiple_test_errors)
print(RSSm)
print(RSS < RSSm)

Real house price: 310000.0
269870818927390.75
False
