In [1]:
import turicreate

In [2]:

sales = turicreate.SFrame('home_data.sframe')

In [3]:
import numpy as np

In [6]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # this is how you add a constant column to an SFrame
    # add the column 'constant' to the front of the features list so that we can extract it along with the others:
    features = ['constant'] + features # this is how you combine two lists
    # select the columns of data_SFrame given by the features list into the SFrame features_sframe (now including constant):
    features_sframe = data_sframe[features]
    # the following line will convert the features_SFrame into a numpy matrix:
    feature_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the output to the SArray output_sarray
    output_sarray = data_sframe[output]
    # the following will convert the SArray into a numpy array by first converting it to a list
    output_array = output_sarray.to_numpy()
    return(feature_matrix, output_array)

In [7]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') # the [] around 'sqft_living' makes it a list
print ((example_features[0,:])) # this accesses the first row of the data the ':' indicates 'all columns'
print ((example_output[0])) # and the corresponding output

[1.00e+00 1.18e+03]
221900.0


In [8]:
my_weights = np.array([1., 1.]) # the example weights
my_features = example_features[0,] # we'll use the first data point
predicted_value = np.dot(my_features, my_weights)
print ((predicted_value))

1181.0


In [9]:
def predict_output(feature_matrix, weights):
    # assume feature_matrix is a numpy matrix containing the features as columns and weights is a corresponding numpy array
    # create the predictions vector by using np.dot()
    
    predictions = np.dot(feature_matrix,weights)
    return(predictions)


In [10]:
test_predictions = predict_output(example_features, my_weights)
print ((test_predictions[0]))# should be 1181.0
print ((test_predictions[1])) # should be 2571.0


1181.0
2571.0


In [11]:
def feature_derivative(errors, feature):
    # Assume that errors and feature are both numpy arrays of the same length (number of data points)
    # compute twice the dot product of these vectors as 'derivative' and return the value
    derivative = 2*np.dot(errors,feature)
    return(derivative)


In [12]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_output(example_features, my_weights) 
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output # prediction errors in this case is just the -example_output
feature = example_features[:,0] # let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
derivative = feature_derivative(errors, feature)
print ((derivative))
print ((-np.sum(example_output)*2)) # should be the same as derivative

-23345850022.0
-23345850022.0


In [13]:

from math import sqrt # recall that the magnitude/length of a vector [g[0], g[1], g[2]] is sqrt(g[0]^2 + g[1]^2 + g[2]^2)

In [14]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False 
    weights = np.array(initial_weights) # make sure it's a numpy array
    while not converged:
        # compute the predictions based on feature_matrix and weights using your predict_output() function
        predictions = predict_output(feature_matrix,weights)
        # compute the errors as predictions - output
        errors = predictions - output
        gradient_sum_squares = 0 # initialize the gradient sum of squares
        # while we haven't reached the tolerance yet, update each feature's weight
        for i in range(len(weights)): # loop over each weight
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            feature = feature_matrix[:,i]
            derivative = feature_derivative(errors,feature)
            # add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
            gradient_sum_squares += derivative**2
            # subtract the step size times the derivative from the current weight
            weights[i] -= step_size*derivative
        # compute the square-root of the gradient sum of squares to get the gradient magnitude:
        gradient_magnitude = sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

# def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
#     converged = False 
#     weights = np.array(initial_weights) # make sure it's a numpy array
#     while not converged:
#         # compute the predictions based on feature_matrix and weights using your predict_output() function
#         predictions = predict_output(feature_matrix, weights)
#         # compute the errors as predictions - output
#         errors = predictions - output
#         gradient_sum_squares = 0 # initialize the gradient sum of squares
#         # while we haven't reached the tolerance yet, update each feature's weight
#         for i in range(len(weights)): # loop over each weight
#             # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
#             # compute the derivative for weight[i]:
#             feature = feature_matrix[:, i]
#             derivative = feature_derivative(errors, feature)
#             # add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
#             gradient_sum_squares += derivative**2
#             # subtract the step size times the derivative from the current weight
#             weights[i] -= step_size*derivative
#         # compute the square-root of the gradient sum of squares to get the gradient matnigude:
#         gradient_magnitude = sqrt(gradient_sum_squares)
#         if gradient_magnitude < tolerance:
#             converged = True
#     return(weights)

In [15]:
train_data,test_data = sales.random_split(.8,seed=0)

In [16]:
# let's test out the gradient descent
simple_features = ['sqft_living']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [17]:
my_sqft_weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)
print( (my_sqft_weights))


[-46999.88716555    281.91211912]


# Quiz Question: What is the value of the weight for sqft_living -- the second element of ‘simple_weights’ (rounded to 1 decimal place)?



In [18]:
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

In [19]:
test_predictions = predict_output(test_simple_feature_matrix, my_sqft_weights)
print ((test_predictions))

[356134.44317093 784640.86422788 435069.83652353 ... 663418.65300782
 604217.10799338 240550.4743332 ]


# Quiz Question: What is the predicted price for the 1st house in the TEST data set for model 1 (round to nearest dollar)?

In [20]:
print ((np.rint(test_predictions[0])))

356134.0


In [21]:
test_errors = test_predictions - test_output
RSS = (test_errors*test_errors).sum()
print (RSS)


275400047593155.94


# Running a multiple regression

In [22]:
model_features = ['sqft_living', 'sqft_living15'] # sqft_living15 is the average squarefeet for the nearest 15 neighbors. 
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [23]:
model_weights = regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance)
print (model_weights)

[-9.99999688e+04  2.45072603e+02  6.52795277e+01]


In [24]:
(test_feature_matrix,test_output) = get_numpy_data(test_data,model_features,my_output)
new_test_predictions = predict_output(test_feature_matrix,model_weights)


# Quiz Question: What is the predicted price for the 1st house in the TEST data set for model 2 (round to nearest dollar)?

In [25]:
print (np.rint(new_test_predictions[0]))

366651.0


In [26]:
print (test_data[0]['price'])

310000.0


# Quiz Question: Which estimate was closer to the true price for the 1st house on the TEST data set, model 1 or model 2?

In [27]:
test_errors = new_test_predictions - test_output
RSS = (test_errors*test_errors).sum()
print (RSS)

270263446465244.06


# Quiz Question: Which model (1 or 2) has lowest RSS on all of the TEST data?

model 1