# Estimating House Prices (Multiple Regression) - Manual Gradient Descent Calculation

Regression estimating house prices based on previous house sales, using features like house size (sqft), number of bathrooms and so on...
We manually calculate the Gradient Descent of the regression algorithm for multiple regression cases.

<hr>

In [1]:
import turicreate

In [2]:
import numpy as np

In [3]:
sales = turicreate.SFrame('../data/home_data.sframe/')

### Creating a helper function to convert SFrame to numpy matrices, leveraging the streghts of numpy and turicreate

In [4]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # this is how you add a constant column to an SFrame
    
    # add the column 'constant' to the front of the features list so that we can extract it along with the others:
    features = ['constant'] + features # this is how you combine two lists
    
    # select the columns of data_SFrame given by the features list into the SFrame features_sframe (now including constant):
    features_sframe = data_sframe[features]
    
    # the following line will convert the features_SFrame into a numpy matrix:
    feature_matrix = features_sframe.to_numpy()
    
    # assign the column of data_sframe associated with the output to the SArray output_sarray
    output_sarray = data_sframe[output]
    
    # the following will convert the SArray into a numpy array by first converting it to a list
    output_array = output_sarray.to_numpy()
    return(feature_matrix, output_array)

### Testing

In [5]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') # the [] around 'sqft_living' makes it a list
print(sales[0]['sqft_living'])
print(sales[0]['price'])
print(example_features[0,:]) # this accesses the first row of the data the ':' indicates 'all columns'
print(example_output[0]) # and the corresponding output

1180.0
221900.0
[1.00e+00 1.18e+03]
221900.0


# Predicting output given regression weights

In [6]:
def predict_output(feature_matrix, weights):
    # assume feature_matrix is a numpy matrix containing the features as columns and weights is a corresponding numpy array
    # create the predictions vector by using np.dot()
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

### Testing

In [7]:
my_weights = np.array([1., 1.]) # the example weights
my_features = example_features[0,] # we'll use the first data point
test_predictions = predict_output(example_features, my_weights)
print (test_predictions[0]) # should be 1181.0
print (test_predictions[1]) # should be 2571.0

1181.0
2571.0


# Creating the derivative function for gradient descent

In [8]:
def feature_derivative(errors, feature):
    # Assume that errors and feature are both numpy arrays of the same length (number of data points)
    # compute twice the dot product of these vectors as 'derivative' and return the value
    
    # This is the RSS derivative
    derivative = 2 * np.dot(errors, feature)
    
    return(derivative)

### Testing

In [9]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_output(example_features, my_weights) 
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output # prediction errors in this case is just the -example_output
feature = example_features[:,0] # let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
derivative = feature_derivative(errors, feature)
print (derivative)
print (-np.sum(example_output)*2) # should be the same as derivative

-23345850022.0
-23345850022.0


# Creating the Gradient Descent algorithm

In [10]:
from math import sqrt 

In [11]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False 
    weights = np.array(initial_weights) # make sure it's a numpy array
    while not converged:
        # compute the predictions based on feature_matrix and weights using your predict_output() function
        predictions = predict_output(feature_matrix, weights)
        
        # compute the errors as predictions - output
        errors = predictions - output
        
        gradient_sum_squares = 0 # initialize the gradient sum of squares
        
        # while we haven't reached the tolerance yet, update each feature's weight
        for i in range(len(weights)): # loop over each weight
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            derivative = feature_derivative(errors, feature_matrix[:,i])
            
            # add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
            gradient_sum_squares = gradient_sum_squares + derivative**2 #np.dot(derivative,derivative) # #np.dot(derivative,derivative)
            
            # subtract the step size times the derivative from the current weight
            weights[i] = weights[i] - step_size*derivative
            
        # compute the square-root of the gradient sum of squares to get the gradient magnitude:
        gradient_magnitude = sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

# 1. Now testing Gradient Descent on a single (one feature) regression

In [12]:
train_data,test_data = sales.random_split(.8,seed=0)

In [13]:
# let's test out the gradient descent
features_1 = ['sqft_living']
my_output = 'price'
(feature_matrix_1, output_1) = get_numpy_data(train_data, features_1, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [14]:
gradient_weights_1 = regression_gradient_descent(feature_matrix_1, output_1, initial_weights, step_size, tolerance)
print(gradient_weights_1)

[-46999.88716555    281.91211912]


### What is the value of the sqft_living weight?

In [15]:
gradient_weights_1[1]

281.91211911641625

### Predicting all test data

In [16]:
(test_feature_matrix_1, test_output_1) = get_numpy_data(test_data, features_1, my_output)

In [17]:
test_predictions_1 = predict_output(test_feature_matrix_1, gradient_weights_1)

### Model 1 predicted house price for the test set 1st house

In [18]:
print (test_predictions_1[0])

356134.4431709297


### RSS

In [19]:
rss_model_1 = sum((test_predictions_1 - test_output_1)*(test_predictions_1 - test_output_1))
print(rss_model_1)

275400047593155.7


# 2. Multiple Regression

In [20]:
features_2 = ['sqft_living', 'sqft_living15'] # sqft_living15 is the average squarefeet for the nearest 15 neighbors. 
my_output = 'price'
(feature_matrix_2, output_2) = get_numpy_data(train_data, features_2, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [21]:
gradient_weights_2 = regression_gradient_descent(feature_matrix_2, output_2, initial_weights, step_size, tolerance)
print(gradient_weights_2)

[-9.99999688e+04  2.45072603e+02  6.52795277e+01]


### Predictions

In [22]:
(test_feature_matrix_2, test_output_2) = get_numpy_data(test_data, features_2, my_output)
test_predictions_2 = predict_output(test_feature_matrix_2, gradient_weights_2)
print (test_predictions_2[0])
print (test_predictions_2[1])

366651.4120365591
762662.3978616422


### RSS

In [23]:
rss_model_2 = sum((test_predictions_2 - test_output_2)*(test_predictions_2 - test_output_2))

### Model 2 predicted house price for the test set 1st house

In [24]:
print (test_predictions_2[0])

366651.4120365591


### Test set 1st house real price

In [25]:
test_output_1[0]

310000.0

# Which estimation was closer?

In [26]:
test_output_1[0] - test_predictions_1[0]

-46134.44317092968

In [27]:
test_output_2[0] - test_predictions_2[0]

-56651.41203655908

Model 1 was closer

# Which model had lower RSS on the test data?

In [28]:
rss_model_1 > rss_model_2

True

Model 2 had lower RSS.