In [194]:
import graphlab
import numpy as np

In [195]:
sales = graphlab.SFrame('kc_house_data.gl/')

In [196]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # add a constant column to an SFrame; for the "intercept"
    # prepend variable 'constant' to the features list; prepend makes it first column 0
    features = ['constant'] + features
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe = data_sframe[features]
    # this will convert the features_sframe into a numpy matrix with GraphLab Create >= 1.7!!
    features_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray = data_sframe[output]
    # this will convert the SArray into a numpy array:
    output_array = output_sarray.to_numpy() # GraphLab Create>= 1.7!!
    return(features_matrix, output_array)

In [197]:
def predict_outcome(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [198]:
def feature_derivative(errors, feature):
    derivative = 2 * np.dot(errors, feature)
    return(derivative)

In [199]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    
    converged = False
    weights = np.array(initial_weights)

    while not converged:
        
        # compute the predictions based on feature_matrix and weights:
        predictions = predict_outcome(feature_matrix, weights)
        
        # compute the errors as predictions - output:
        errors = predictions - output
        
        gradient_sum_squares = 0 # initialize the gradient
        
        # while not converged, update each weight individually:
        for i in range(len(weights)):
    
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            derivative = feature_derivative(errors, feature_matrix[:, i])
            
            # add the squared derivative to the gradient magnitude
            gradient_sum_squares += derivative**2

            # update the weight based on step size and derivative:
            weights[i] = weights[i] - (step_size * derivative)

        gradient_magnitude = np.sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
            
    return(weights)


In [200]:
train_data,test_data = sales.random_split(.8,seed=0)

In [201]:
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [202]:
simple_weights = regression_gradient_descent(simple_feature_matrix,
                                             output,
                                             initial_weights,
                                             step_size,
                                             tolerance)

### Quiz Question 9

In [203]:
print simple_weights

[-46999.88716555    281.91211912]


### Quiz Question 10

In [204]:
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

### Quiz Question 11

In [205]:
test_predicted_house_prices = predict_outcome(test_simple_feature_matrix, simple_weights)
test_predicted_house_prices[0]

356134.44317092968

### Quiz Question 12

In [206]:
RSS1 = sum(test_predicted_house_prices - test_output)**2
print RSS1

2.65832861844e+14


### Quiz Question 13

In [207]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [208]:
regression_weights = regression_gradient_descent(feature_matrix,
                                                 output,
                                                 initial_weights,
                                                 step_size,
                                                 tolerance)
print regression_weights

[ -9.99999688e+04   2.45072603e+02   6.52795277e+01]


### Quiz Question 14 / 15

In [209]:
(test_feature_matrix, test_output) = get_numpy_data(test_data, model_features, my_output)
test_predicted_house_prices = predict_outcome(test_feature_matrix, regression_weights)
test_predicted_house_prices[0]

366651.41203655908

### Quiz Question 16

In [210]:
test_output[0]

310000.0

### Quiz Question 17

Model 1 was closer

### Quiz Question 18

In [211]:
RSS2 = sum(test_predicted_house_prices - test_output)**2
print RSS2

2.35336652403e+14


### Quiz Question 19

model 2 has a lower RSS