In [15]:
# gradient descent function
import numpy as np
def gradient_descent(X, y, learning_rate = 0.0001, max_iter = 1000, epsilon = 0.0001, print_output = True):

  # number of obs
  n = X.shape[0]

  # initializing w
  w = np.random.randn(X.shape[1],1)

  # loop over max iterations
  for i in range(max_iter):
    yhat = X.dot(w)                        # prediction
    loss = (y - yhat)**2                   # loss function
    gradient = (1/n) * X.T.dot(yhat - y)   # gradient

    cost = (2/n)*np.sum(loss)           # cost function

    w = w - (learning_rate * gradient)

    ## stop if convergence
    if np.linalg.norm(learning_rate * gradient) < epsilon:
      print(f'Convergence at iteration: {i}')
      print('\n')
      break

    if print_output:
      print(f'\n__________________________________________________________________ \
              \nIteration # {i+1},\nEstimate w = \n{np.around(w, decimals=5)}, nCost: {cost: 0.5f}, \nGradient: \n{gradient}')
  return w, gradient

In [13]:
## illustration
import numpy

np.random.seed(630)
x = np.random.rand(100)
X = np.vstack([np.ones(len(x)), x]).T
## let's create an oracle
## THE TRUE y-intercept 2
## THE TRUE slope 1.5
## some random error
y = 2 + 1.5*x + np.random.normal(0,0.4,100)
y = y.reshape(-1,1)

In [16]:
gradient_descent(X,y, learning_rate = 0.01,max_iter=10000, print_output=False)

Convergence at iteration: 2911




(array([[2.04517696],
        [1.38397059]]),
 array([[ 0.00416752],
        [-0.00908449]]))