In [96]:
import graphlab
import numpy as np

In [97]:
sales = graphlab.SFrame('/Users/Sajjad/Downloads/kc_house_data.gl')

In [98]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe = data_sframe[features]
    # this will convert the features_sframe into a numpy matrix with GraphLab Create >= 1.7!!
    features_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray = data_sframe[output]
    # this will convert the SArray into a numpy array:
    output_array = output_sarray.to_numpy() # GraphLab Create>= 1.7!!
    return(features_matrix, output_array)

In [99]:
mfet,mout = get_numpy_data(sales,['sqft_living'],['price'])

In [100]:
mfet[-1]

array([  1.00000000e+00,   1.02000000e+03])

In [101]:

mout

array([[ 221900.],
       [ 538000.],
       [ 180000.],
       ..., 
       [ 402101.],
       [ 400000.],
       [ 325000.]])

In [102]:
def predict_outcome(feature_matrix, weights):
    
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [103]:
my_weights = np.array([1., 1.])
my_features = mfet[1,]
test_predictions = predict_outcome(my_features, my_weights)
test_predictions

2571.0

In [104]:
my_weights = np.array([1., 1.]) # the example weights
my_features = mfet[0,] # we'll use the first data point
predicted_value = np.dot(my_features, my_weights)
print predicted_value
print my_weights

1181.0
[ 1.  1.]


In [105]:
def feature_derivative(errors, feature):
    derivative = 2* np.dot(feature, errors)
    return(derivative)

In [106]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_outcome(example_features, my_weights) 
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output # prediction errors in this case is just the -example_output
feature = example_features[:,0] # let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
derivative = feature_derivative(errors, feature)
print derivative
print -np.sum(example_output)*2 # should be the same as derivative

-23345850022.0
-23345850022.0


In [107]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False 
    weights = initial_weights # make sure it's a numpy array
    while not converged:
        # compute the predictions based on feature_matrix and weights using your predict_output() function
        predictions = predict_outcome(feature_matrix, weights)
        # compute the errors as predictions - output
        errors = predictions - output
        gradient_sum_squares = 0 # initialize the gradient sum of squares
        # while we haven't reached the tolerance yet, update each feature's weight
        for i in range(len(weights)): # loop over each weight
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            feature = feature_matrix[:, i]
            derivative = feature_derivative(errors, feature)
            # add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
            gradient_sum_squares += derivative**2
            # subtract the step size times the derivative from the current weight
            weights[i] -= step_size*derivative
        # compute the square-root of the gradient sum of squares to get the gradient matnigude:
        gradient_magnitude = np.sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)


In [108]:

simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(sales, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7
simple_weight = regression_gradient_descent(simple_feature_matrix,output,initial_weights,step_size,tolerance)
simple_weight

array([-46999.88187783,    281.99922231])

In [109]:
train_data,test_data = sales.random_split(.8,seed=0)
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(test_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7
simple_weight = regression_gradient_descent(simple_feature_matrix,output,initial_weights,step_size,tolerance)

predicted_test_house_price = predict_outcome(simple_feature_matrix,simple_weight)
predicted_test_house_price[0]

356774.13951860263

In [110]:
(test_feature,test_output) = get_numpy_data(test_data,simple_features,my_output)

In [111]:
predicted_tests_house_price = predict_outcome(test_feature,simple_weight)
predicted_tests_house_price[0]

356774.13951860263

In [112]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features,my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9
simple_weight2 = regression_gradient_descent(feature_matrix,output,initial_weights,step_size,tolerance)
simple_weight2

array([ -9.99999688e+04,   2.45072603e+02,   6.52795277e+01])

In [113]:
(test_feature,test_output) = get_numpy_data(test_data,model_features,my_output)
predicted_test_house_price = predict_outcome(test_feature,simple_weight2)
predicted_test_house_price[0]

366651.41203655908

In [114]:
a =test_data[0]['price']
a

310000.0

In [115]:

test_data['error'] = test_output- predicted_test_house_price
    

In [116]:
test_data['error'].sum()

15340686.177717872

In [117]:
test_data['error'] = test_output- predicted_tests_house_price

In [118]:
test_data['error'].sum()

12370649.91486343

In [119]:
simple_features = ['sqft_living']
my_output = 'price'
(test_feature,test_output) = get_numpy_data(test_data,simple_features,my_output)
predicted_test_house_price = predict_outcome(test_feature,simple_weight)
predicted_test_house_price[0]

356774.13951860263

In [122]:
simple_features = ['sqft_living']
my_output= 'price'
(bal, output) = get_numpy_data(test_data, simple_features, my_output)


predicted_test_house_price = predict_outcome(bal,simple_weight)

In [123]:
predicted_test_house_price[0]

356774.13951860263