In [1]:
#![title](./img/log.png "ShowMyImage")

In [2]:
import sklearn, pandas as pd
import numpy as np

In [3]:
full_data = pd.read_csv("../regression/data/kc-house-data.csv", index_col=0)
full_data.head()

Unnamed: 0_level_0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [4]:
def convert_to_numpy_matrix(data,features_list,output_name):
    #add constant if missing, intercept
    if 'constant' not in data:
        data['constant'] = 1
    
    features_list = ['constant'] + features_list #combine 2 lists
    df_features_matrix = data[features_list].values
    output_array = data[output_name].values
    return (df_features_matrix,output_array)

In [5]:
#For testing let's use the 'sqft_living' feature and a constant as our features and price as our output:
(example_features,example_output)=convert_to_numpy_matrix(full_data,['sqft_living'],'price')

In [6]:
print(example_features[0,:]) #0 => only first row, : => all columns
print(example_output[0])

[   1 1180]
221900.0


In [7]:
my_weights = np.array([3.,2.]) #example weights
my_features = example_features[0,] # we will use the first data point

predicted_value = np.dot(my_features,my_weights)
print(f" {my_weights=}, {my_features=},{predicted_value=}")


 my_weights=array([3., 2.]), my_features=array([   1, 1180], dtype=int64),predicted_value=2363.0


In [8]:
def predict_output(feature_matrix, weights):
    # assume feature_matrix is a numpy matrix containing the features as columns and weights is a corresponding numpy array
    # create the predictions vector by using np.dot()
    return (np.dot(feature_matrix,weights))

In [9]:
my_new_weights=[1.,1.]
test_predictions = predict_output(example_features,my_new_weights)
print(test_predictions)

[1181. 2571.  771. ... 1021. 1601. 1021.]


![title](./images/img1.png "ShowMyImage")

In [10]:
def feature_derivative(errors,feature):
# Assume that errors and feature are both numpy arrays of the same length (number of data points)
# compute twice the dot product of these vectors as 'derivative' and return the value
    derivative = np.sum(2*errors*feature)
    return derivative

In [11]:
(example_features,example_output)=convert_to_numpy_matrix(full_data,['sqft_living'],'price')
my_weights=np.array([0.,0.])
test_predictions=predict_output(example_features,my_weights)
test_predictions
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output

# let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
feature = example_features[:,0]
derivative = feature_derivative(errors,feature)
print(errors)
print(derivative)
print(-np.sum(example_output)*2) #should be the same as derivative

[-221900. -538000. -180000. ... -402101. -400000. -325000.]
-23345850016.0
-23345850016.0


In [12]:
from math import sqrt

def reg_gradient_descent(feature_matrix,output,initial_weights,step_size,tolerance):
    converged=False
    weights = np.array(initial_weights)
    while not converged:
        #compute predictions
        predictions = predict_output(feature_matrix, weights)
        #predictions - actual output
        errors = predictions - output
        gradient_sum_squares =0
        
        for i in range(len(weights)):
    # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
    # compute the derivative for weight[i]:
            derivative = feature_derivative(errors,feature_matrix[:,i])
            # add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
            gradient_sum_squares+=np.square(derivative)
            # subtract the step size times the derivative from the current weight
            weights[i] = weights[i] - step_size*derivative
            print(f"{weights[i]=}")
        # compute the square-root of the gradient sum of squares to get the gradient magnitude:
        gradient_magnitude = sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            print(f"{gradient_magnitude=}")
            converged = True
    return(weights)            


In [13]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(full_data, train_size=0.8, test_size=0.2, random_state=0)

In [14]:
# let's test out the gradient descent
simple_features = ['sqft_living']
my_output = 'price'
(simple_feature_matrix, output) = convert_to_numpy_matrix(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [16]:
updated_weights = reg_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)

weights[i]=-46999.857736879
weights[i]=355.489581216176
weights[i]=-46999.89407547345
weights[i]=265.09794167400736
weights[i]=-46999.88487224454
weights[i]=288.1469928825168
weights[i]=-46999.887281769006
weights[i]=282.26969315165684
weights[i]=-46999.88673014631
weights[i]=283.7683514930775
weights[i]=-46999.886933589376
weights[i]=283.38620718510504
weights[i]=-46999.88694449749
weights[i]=283.4836505552754
weights[i]=-46999.88700450028
weights[i]=283.4588034017161
weights[i]=-46999.88705198436
weights[i]=283.4651392265269
weights[i]=-46999.88710266061
weights[i]=283.4635236736645
weights[i]=-46999.887152522875
weights[i]=283.4639356502494
weights[i]=-46999.8872025927
weights[i]=283.4638306251488
gradient_magnitude=16621389.452372102
