In [1]:
import graphlab
import numpy as np

A newer version of GraphLab Create (v1.8.5) is available! Your current version is v1.8.1.

You can use pip to upgrade the graphlab-create package. For more information see https://dato.com/products/create/upgrade.


In [2]:
sales = graphlab.SFrame('kc_house_data.gl/')

[INFO] This non-commercial license of GraphLab Create is assigned to kaviarasu.govindaraju@snapchat.com and will expire on February 06, 2017. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-6101 - Server binary: /Users/kaviarasu.govindaraju/anaconda/envs/dato-env/lib/python2.7/site-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1459292635.log
[INFO] GraphLab Server Version: 1.8.1


In [3]:
# Create a features matrix and an output matrix from raw data
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features
    
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe = graphlab.SFrame()
    for feature in features:
        features_sframe[feature] = data_sframe[feature]
        
    # this will convert the features_sframe into a numpy matrix with GraphLab Create >= 1.7!!
    features_matrix = features_sframe.to_numpy()
    
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray = data_sframe[output]
    
    # this will convert the SArray into a numpy array:
    output_array = output_sarray.to_numpy()
    return(features_matrix, output_array)

# Given features_matrix and a matrix of regression coefficients (weights), compute the outcome
# Just the dot product of these two
def predict_outcome(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

## Computing the derivative

In [4]:
# Ridge Cost = RSS + l2_penalty (sqaure of coefficients)
#            = (y - Hw)^2 + l2_penalty (w^2)

# Gradient(Ridge Cost) = -2H(y-Hw) + 2 (l2_penalty) (w)
#                      = 2*SUM[ feature_i*[error] ] + 2*l2_penalty*w[i]

def feature_derivative_ridge(errors, feature, weight, l2_penalty, feature_is_constant):
    # If feature_is_constant is True, derivative is twice the dot product of errors and feature
    # Otherwise, derivative is twice the dot product plus 2*l2_penalty*weight
    derivative = None
    if feature_is_constant:
        derivative = 2 * np.dot(errors, feature)
    else:
        derivative = (2 * np.dot(errors, feature)) + (2 * l2_penalty * weight)
    return derivative

In [6]:
## Test code to verify derivative computation

(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([1., 10.])
test_predictions = predict_outcome(example_features, my_weights) 
errors = test_predictions - example_output # prediction errors

# next two lines should print the same values
print feature_derivative_ridge(errors, example_features[:,1], my_weights[1], 1, False)
print np.sum(errors*example_features[:,1])*2+20.
print ''

# next two lines should print the same values
print feature_derivative_ridge(errors, example_features[:,0], my_weights[0], 1, True)
print np.sum(errors)*2.

-5.65541667824e+13
-5.65541667824e+13

-22446749336.0
-22446749336.0


## Descent

In [9]:
def ridge_regression_gradient_descent(feature_matrix, output, initial_weights, step_size, l2_penalty, max_iterations=100):
    weights = np.array(initial_weights) # make sure it's a numpy array
    
    iteration = 0
    while iteration < max_iterations:
        predictions = predict_outcome(feature_matrix, initial_weights)
        
        errors = predictions - output
        for i in xrange(len(weights)):
            feature_derivative = feature_derivative_ridge(errors, feature_matrix[:,i], weights[i], l2_penalty, True)
            weights[i] = weights[i] - (step_size * feature_derivative)
        
        iteration = iteration + 1
    return weights

## Visualizing effect of L2 penalty

In [10]:
simple_features = ['sqft_living']
my_output = 'price'
train_data,test_data = sales.random_split(.8,seed=0)

In this part, we will only use 'sqft_living' to predict 'price'

In [11]:
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
(simple_test_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

In [None]:
initial_weights = np.array([0., 0.])
step_size = 1e-12
max_iterations=1000
simple_weights_0_penalty = ridge_regression_gradient_descent(simple_feature_matrix, output, initial_weights,
                                                            step_size, 0, max_iterations)
