In [1]:
!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting jedi>=0.16 (from ipython->ipython-autotime)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl (7.0 kB)
Using cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.2 jedi-0.19.1
time: 297 µs (started: 2024-08-27 16:10:59 +00:00)


In [4]:
# Stochastic Gradient descent (Multiple linear regression)
import numpy as np

x = np.array([[0,1],[2,6],[3,8]]) #x1, x2
y = np.array([1,1,4])

x_b = np.c_[np.ones((x.shape[0],1)),x]

def cost_function(theta, x, y, N):
  y_hat = x.dot(theta)
  c = (1/(2*N))*np.sum((y_hat-y)**2)
  return c

def stochastic_gradient_descent(alpha, x, y, ep=0.001, max_iter=10000):
  converged = False
  iter = 0
  N = x.shape[0] # number of samples
  print("Num of data = ",N)

  # initial theta
  theta =  np.random.random((x.shape[1],1))
  print("Init theta.shape = ",theta.shape)

  # total error, J(theta)
  J = cost_function(theta, x, y, N)
  print("First J = ",J)

  # Iterate Loop
  while not converged:
    for i in range(N):
      y_hat = x[i].dot(theta)
      diff = y_hat - y[i]
      grad = x[i].reshape(1,-1).T.dot(diff)

      grad = grad.reshape(-1, 1)  # Ensure grad is a column vector

      theta = theta - alpha * grad

      assert theta.shape == (3,1) #This line makes sure that the shape of theta is still be the same.

    # error
    J2 = cost_function(theta, x, y, N)

    if abs(J-J2) <= ep:
        print("       Converged, iterations: ", iter, "/", max_iter)
        converged = True

    J = J2   # update error
    iter += 1  # update iter

    if iter == max_iter:
        print('       Max iterations exceeded!')
        converged = True

  #print("End converged iter = ",iter)
  return theta

if __name__ == '__main__':

  print("start main")
  print(x_b.shape)
  y = y.reshape(-1,1)
  print(y.shape)

  alpha = 0.01 # learning rate
  #Training process
  theta = stochastic_gradient_descent(alpha, x_b, y, ep=0.000000000001, max_iter=1000000)
  print ("Theta = ", theta)

  #predict trainned x
  xtest = np.array([[4,9]])
  xtest_b = np.c_[np.ones((xtest.shape[0],1)),xtest]
  y_p = xtest_b.dot(theta)
  print("y predict = ",y_p)


start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  4.729380649088009
       Converged, iterations:  76643 / 1000000
Theta =  [[ 6.99940575]
 [14.99874255]
 [-5.99945757]]
y predict =  [[12.99925781]]
time: 3.27 s (started: 2024-08-27 16:19:54 +00:00)


In [5]:
# Mini Batch Gradient descent (Multiple linear regression)

x = np.array([[0,1],[2,6],[3,8]]) # x1, x2
y = np.array([1,1,4])

x_b = np.c_[np.ones((x.shape[0],1)),x]

def cost_function(theta, x, y, N):
    y_hat = x.dot(theta)
    c = (1/(2*N)) * np.sum((y_hat-y)**2)
    return c

def mini_batch_gradient_descent(alpha, x, y, batch_size=2, ep=0.001, max_iter=10000):
    converged = False
    iter = 0
    N = x.shape[0]  # number of samples
    print("Num of data = ", N)

    # initial theta
    theta = np.random.random((x.shape[1], 1))
    print("Init theta.shape = ", theta.shape)

    # total error, J(theta)
    J = cost_function(theta, x, y, N)
    print("First J = ", J)

    # Iterate Loop
    while not converged and iter < max_iter:
        for i in range(0, N, batch_size):
            # Create mini-batch
            x_batch = x[i:i+batch_size]
            y_batch = y[i:i+batch_size]

            # Predict
            y_hat = x_batch.dot(theta)

            # Calculate gradient
            diff = y_hat - y_batch
            grad = x_batch.T.dot(diff)

            # Update theta
            theta = theta - alpha * (1/batch_size) * grad

            assert theta.shape == (3, 1)  # Ensure theta's shape remains the same

        # Calculate new cost
        J2 = cost_function(theta, x, y, N)

        if abs(J-J2) <= ep:
            print("       Converged, iterations: ", iter, "/", max_iter)
            converged = True

        J = J2  # update error
        iter += 1  # update iter

        if iter == max_iter:
            print('       Max iterations exceeded!')
            converged = True

    return theta

if __name__ == '__main__':

    print("start main")
    print(x_b.shape)
    y = y.reshape(-1, 1)
    print(y.shape)

    alpha = 0.01  # learning rate
    # Training process
    theta = mini_batch_gradient_descent(alpha, x_b, y, batch_size=2, ep=0.000000000001, max_iter=1000000)
    print("Theta = ", theta)

    # predict trained x
    xtest = np.array([[4, 9]])
    xtest_b = np.c_[np.ones((xtest.shape[0], 1)), xtest]
    y_p = xtest_b.dot(theta)
    print("y predict = ", y_p)

start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  2.136822272756809
       Converged, iterations:  169468 / 1000000
Theta =  [[ 6.99904362]
 [14.99793086]
 [-5.99911606]]
y predict =  [[12.99872249]]
time: 4.14 s (started: 2024-08-27 16:21:04 +00:00)
