In [11]:
# Step 1: Load dataset, split into training and test sets, and scale features
import numpy as np
from sklearn import datasets
import pandas as pd
#from sklearn.datasets import load_boston
california = datasets.fetch_california_housing(as_frame=False)
#X, y = housing.data, housing.target
# load boston housing price dataset

x = california.data
y = california.target

# split into training and test sets, namely 80 percent of examples goes for the training, 20 percent goes for the test set
N_train = int(0.8 * x.shape[0])
x_train = x[:N_train,:]
y_train = y[:N_train]
x_test = x[N_train:,:]
y_test = y[N_train:]

x_train_mean = np.mean(x_train, axis=0)
x_train_std = np.std(x_train, axis=0)
x_test_mean = np.mean(x_test, axis=0)
x_test_std = np.std(x_test, axis=0)
# scale features by removing mean and dividing by the standard deviation
x_train_scaled = (x_train-x_train_mean)/x_train_std# YOUR CODE GOES HERE
x_test_scaled = (x_test-x_test_mean)/x_test_std# YOUR CODE GOES HERE

print(x_train_scaled.shape)
print(y_train.shape)
print(x_test_scaled.shape)
print(y_test.shape)

(16512, 8)
(16512,)
(4128, 8)
(4128,)


In [13]:
# Step 2: Add intercept terms and initialize parameters
# Note: If you run this step again, please run from step 1 because notebook keeps the value from the previous run
x_train_scaled = np.concatenate((np.ones((x_train_scaled.shape[0], 1)), x_train_scaled), axis=1)
x_test_scaled = np.concatenate((np.ones((x_test_scaled.shape[0], 1)), x_test_scaled), axis=1)

print(x_train_scaled.shape)
print(x_test_scaled.shape)


# init parameters using random values
theta = np.random.randn(x_train_scaled.shape[1], 1)# YOUR CODE GOES HERE
print(theta)

(16512, 10)
(4128, 10)
[[ 0.6227632 ]
 [-0.36025802]
 [ 1.32368945]
 [-0.7853527 ]
 [-0.70974507]
 [-0.99732841]
 [-1.10761125]
 [-1.35679806]
 [ 0.01499003]
 [-0.12272687]]


In [17]:
# Step 3: Implement the gradient and the cost function
# In this step, you have to calculate the gradient. You can use the provided formula but the best way is to vectorize
# that formula for efficiency
def compute_gradient(x,y,theta):
    # YOUR CODE GOES HERE
    grad=(1/len(x)) * ((x.T) @(x @ theta-y))
    return grad
def compute_cost(x,y,theta):
    # YOUR CODE GOES HERE
    cost=(1/(2*len(x)))@((x@theta-y)@(x@theta-y))
    return cost

In [19]:
# Step 4: Verify the gradient value
# In this step, you need to verify that the computed gradient is correct. The difference betweet the gradient and the
# approximate gradient should be very small (~10^-18)
def approximate_gradient(x,y,theta,epsilon):
    n_features = x.shape[1]
    app_grad = np.zeros(n_features)
    for i in range(n_features):
        e = np.zeros(theta.shape)
        e[i] = epsilon
        cost_plus = compute_cost(x, y, theta + e)
        cost_minus = compute_cost(x, y, theta - e)
        app_grad[i] = (cost_plus - cost_minus) / (2 * epsilon)# YOUR CODE GOES HERE
    return app_grad

grad = compute_gradient(x_train_scaled,y_train,theta)
epsilon = 1e-4
app_grad = approximate_gradient(x_train_scaled,y_train,theta,epsilon)
print('Sum of gradient squared error: ',np.sum((grad - app_grad)**2))

TypeError: dot() missing 1 required positional argument: 'b'

In [None]:
# Step 5: Try gradient descent algorithm with different learning rates
import matplotlib.pyplot as plt
import copy

# try different values for the learning rate
learning_rates = [0.001,0.003,0.01,0.03,0.1,0.3]

# this matrix keeps the learned parameters
theta_matrix = np.zeros((len(learning_rates),x_train_scaled.shape[1]))

# number of training iterations
N_iterations = 100

# prepare to plot
plt.subplot(111)

# calculate cost value and update theta
for indx,alpha in enumerate(learning_rates):
    # keep the cost value for each training step
    J = np.zeros(N_iterations)

    # initialize new parameters using random distribution
    theta = 0.5 * np.random.randn(x_train_scaled.shape[1])
    for step in range(N_iterations):
        # update theta
        theta = # YOUR CODE GOES HERE

        # save the value of theta
        theta_matrix[indx,:] = theta

        # calculate the cost on traing set
        J[step] = # YOUR CODE GOES HERE
    # plot cost function
    plt.plot(J)
plt.xlabel('Training step')
plt.ylabel('Cost')
plt.legend(('0.001','0.003','0.01','0.03','0.1','0.3'), loc='upper right')
plt.show()


In [None]:
# Step 6: Predict the price of house
# You have to select the best theta you found
theta = # YOUR CODE GOES HERE
predict_price = # YOUR CODE GOES HERE

# calculate the cost for the test set
test_cost = # YOUR CODE GOES HERE
print('test cost: ',test_cost)

# plot the ground truth and the predicted
x_axis = np.linspace(1,len(y_test),len(y_test))
plt.plot(x_axis,y_test,'b',x_axis,predict_price,'r')
plt.legend(('Ground truth','Predicted'))
plt.show()