In [1]:
import numpy as np
from io import StringIO

In [2]:
# Method to compute the individual cost function of the training sets
# Params: X-input matrix, y-output vector, theta-values vector
# Returns: Cost function J
def compute_cost(X, y, theta):
    m = y.size
    h0 = np.array([np.sum(X * theta.T, 0)])
    delta = h0 - y
    J = np.sum(np.square(delta)) / (2 * m)
    return J

In [3]:
# Method to perform gradient descent
# Params: X-input matrix, y-output vector, theta-weights vector, learning_rate, iterations
# Returns: Cost function vector Jhist, Final theta-values vector
def gradient_descent(X, y, theta, learning_rate, iterations):
    m = y.size
    Jhist = np.array([np.zeros(iterations)]).T
    for i in range(iterations):
        h0 = np.array([np.sum(X * theta.T, 0)])
        delta = np.array([np.sum((h0 - y) * X, 1) / m]).T
        theta = theta - (learning_rate * delta).T
        Jhist[i] = compute_cost(X, y, theta)
    
    return Jhist, theta

In [4]:
# Load data set
# This instance has datasets delimited by ','
# e.g
# 12.2,25
# 10.3,22
# 15.2,2.5
# 2.1,0.2
data = np.loadtxt('data.txt', dtype=float, delimiter=',')

In [5]:
# Place datasets on respective matrices and vectors
X = np.array([data[:,0]])
y = np.array([data[:,1]])
theta = np.array([[0, 0]])
learning_rate = 0.010
iterations = 1500
# X.shape, y.shape, theta.shape

In [6]:
# pad inputs with ones to serve as the input for X0 feature to match with theta0 value
padded_ones = np.array([np.ones(X.shape[1])])
X = np.row_stack((padded_ones, X))

In [7]:
Jhist, theta = gradient_descent(X, y, theta, learning_rate, iterations)
print(theta)

[[-3.63029144  1.16636235]]


In [8]:
# sample prediction
sample_prediction = np.array([1, 3.3]) @ theta[0]
print(sample_prediction)

0.21870431670306045
