In [2]:
import numpy as np
import pandas as pd

In [3]:
train_data = pd.read_csv('data/kc_house_train_data.csv', dtype= {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':str, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int})
test_data = pd.read_csv('data/kc_house_test_data.csv', dtype= {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':str, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int})

In [72]:
def get_numpy_data(data, features, output):
    data['constant'] = 1 # add a constant column 

    # prepend variable 'constant' to the features list
    features = ['constant'] + features

    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_matrix=data[features].to_numpy()

    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_array = data[output].to_numpy()

    return(features_matrix, output_array)

In [73]:
def predict_outcome(feature_matrix, weights):
    predictions = np.dot(feature_matrix,weights)
    return(predictions)


# Gradient descent using dot product

In [108]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    i = 0
    while not converged:
        # compute the predictions based on feature_matrix and weights:
        prediction = predict_outcome(feature_matrix,weights)
        # compute the errors as predictions - output:
        error = np.array(prediction-output)
        # while not converged, update each weight individually:
        partial=2*np.dot(feature_matrix.T,error)
        # update the weight based on step size and derivative:
        weights=weights-(step_size*partial)
        gradient_magnitude = np.sum(partial**2)
        gradient_magnitude = np.sqrt(gradient_magnitude)
        if gradient_magnitude < tolerance:
            converged = True
        i+=1
    return(weights,i)

In [109]:
simple_feature_matrix, output = get_numpy_data(train_data, ['sqft_living'], 'price')

In [110]:
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [111]:
simple_weights,i = regression_gradient_descent(simple_feature_matrix, output,initial_weights, step_size, tolerance)

In [112]:
simple_weights, i

(array([-46999.88716555,    281.91211918]), 12)

In [133]:
test_simple_feature_matrix, output_test = get_numpy_data(test_data, ['sqft_living'], 'price')

In [134]:
predict_test = predict_outcome(test_simple_feature_matrix,simple_weights)

In [135]:
predict_test[0]

356134.4432550024

In [117]:
RSS = np.sum((output_test-predict_test)**2)
print("{:.2E}".format(RSS))

2.75E+14


# New model with more than one predictor variable

In [136]:
model_features = ['sqft_living', 'sqft_living15']

output = 'price'

initial_weights = [-100000., 1., 1.] 

step_size = 4e-12

tolerance = 1e9

In [143]:
feature_matrix_TEST, output_array_TEST = get_numpy_data(test_data, model_features, output)

In [144]:
feature_matrix_TRAIN, output_array_TRAIN = get_numpy_data(train_data, model_features, output)

In [145]:
weights_TRAIN, i = regression_gradient_descent(feature_matrix_TRAIN, output_array_TRAIN,initial_weights, step_size, tolerance)

In [147]:
weights_TRAIN,i

(array([-9.99999688e+04,  2.45072603e+02,  6.52795267e+01]), 274)

In [148]:
new_predict_test = predict_outcome(feature_matrix_TEST, weights_TRAIN)

In [149]:
new_predict_test[0]

366651.4116294939

In [151]:
output_array_TEST[0]

310000.0

In [152]:
RSS = np.sum((output_array_TEST-new_predict_test)**2)
print("{:.2E}".format(RSS))

2.70E+14
