In [44]:
""" Gradient Descent using sympy library"""

import numpy as np

# SymPy is a Python library for symbolic mathematics
# https://www.sympy.org/en/index.html
import sympy as sym
from sympy.abc import x

def make_function_and_value(input_algebraic_notation, input_value):
  """ yields both polynomial function and plugin value """
  
  # define polynomial function
  polynomial_function = sym.poly(input_algebraic_notation) # x is predefined variable in sympy.abc
  
  # get value by plugging in input_value to x
  plugged_in_value = polynomial_function.subs(x, input_value)
  
  # return both the function and the output value
  return plugged_in_value, polynomial_function

def yield_gradient_function_and_value(input_function, input_value):
  """ yields first-order derivative function(or, gradient function) """
  
  # calculate gradient of input function(or first-order derivative function) using sympy's differential method
  gradient_function = sym.diff(input_function, x)

  # get the output value by plugin given input_value to x
  gradient_value = gradient_function.subs(x, input_value)

  # return both the function and the output value
  return gradient_value, gradient_function

def gradient_descent(input_function, init_point, lr_rate=1e-2, epsilon=1e-5):
  """ 
  gradient descent algorithm 
  - init_point: initial point
  - lr_rate: learning rate
  - epsilon: convergence criteria(think of epsilon and neighborhood)
  """

  # initialize variables' default value
  iter_cnt = 0 # count of iteration
  current_x_val = init_point # current x value
  gradient_value, gradient_function = yield_gradient_function_and_value(input_function, current_x_val)
  print("Learning rate: {}".format(lr_rate))
  print("Initial point: {}".format(current_x_val))
  print("Gradient function: {}".format(gradient_function))
  print("Convergence criteria: {}".format(epsilon))
  
  # repeat until convergence
  while np.abs(gradient_value) > epsilon:
    # update current point
    current_x_val -= lr_rate * gradient_value

    # update gradient of function at current(or moved) point
    gradient_value, _ = yield_gradient_function_and_value(input_function, current_x_val)
    
    # show the current point
    current_y_val = input_function.subs(x, current_x_val)

    # print information on every 10 iterations
    if iter_cnt % 10 == 0:
      # print current point    
      print(f"Current point: ({current_x_val}, {current_y_val})")
      
      # print iteration and gradient value(or weight delta)
      print("Iteration: {}".format(iter_cnt))
      print("Gradient value: {}".format(gradient_value))

    # update count of iteration
    iter_cnt += 1
  
  # return current point
  return [current_x_val, current_y_val]


In [45]:
from sympy.abc import x

# get random initial x value
x_starting_pt = np.random.randint(1, 10)

# define polynomial function
_, polynomial_function = make_function_and_value(x**2 + 2*x + 3, x_starting_pt)

gradient_descent(input_function = polynomial_function, init_point = x_starting_pt)

Learning rate: 0.01
Initial point: 3
Gradient function: Poly(2*x + 2, x, domain='ZZ')
Convergence criteria: 1e-05
Current point: (2.92000000000000, 17.3664000000000)
Iteration: 0
Gradient value: 7.84000000000000
Current point: (2.20292540299918, 12.2587311371775)
Iteration: 10
Gradient value: 6.40585080599837
Current point: (1.61702324927997, 8.84881068727189)
Iteration: 20
Gradient value: 5.23404649855994
Current point: (1.13829853197915, 6.57232061186421)
Iteration: 30
Gradient value: 4.27659706395831
Current point: (0.747145583487728, 5.05251768990067)
Iteration: 40
Gradient value: 3.49429116697546
Current point: (0.427545145941499, 4.03788514370114)
Iteration: 50
Gradient value: 2.85509029188300
Current point: (0.166408319353113, 3.36050836745615)
Iteration: 60
Gradient value: 2.33281663870623
Current point: (-0.0469594805291656, 2.90828623175324)
Iteration: 70
Gradient value: 1.90608103894167
Current point: (-0.221296507678400, 2.60637912895386)
Iteration: 80
Gradient value: 1.557

[-0.999995020234038, 2.00000000002480]

In [46]:
""" Stochastic Gradient Descent """

import numpy as np
import sympy as sym
from sympy.abc import x

# define constants
INPUT_NOTATION = 7*x + 2 
SIZE = 1000

# train data
train_x = (np.random.rand(SIZE) - 0.5) * 10
train_y = np.zeros_like(train_x)

def function(input_algebraic_notation, input_value):
  polynomial_func = sym.poly(input_algebraic_notation)
  plugged_in_val = polynomial_func.subs(x, input_value)
  return plugged_in_val

for i in range(SIZE):
  train_y[i] = function(INPUT_NOTATION, train_x[i])

# initialize
weight, bias = 0.0, 0.0
lr_rate = 1e-2
int_number_data = 10
errors = []

for i in range(100):
  # calculate gradient
  gradient_weight = np.sum(train_y - weight - bias * train_x)
  gradient_bias = np.sum(train_y - weight - bias * train_x)
  
  # update weight and bias
  weight -= lr_rate * gradient_weight
  bias -= lr_rate * gradient_bias
  
  # calculate error
  error = np.sum(np.square(train_y - weight - bias * train_x))
  errors.append(error)

  if i % int_number_data == 0:
    print(f"Iteration: {i}")
    print(f"Weight: {weight}")
    print(f"Bias: {bias}")
    print(f"Error: {error}")

  # if error is less than 0.001, break
  if error < 0.001:
    break


Iteration: 0
Weight: -15.614135767290168
Bias: -15.614135767290168
Error: 4426978.411558665
Iteration: 10
Weight: -249329140494.23785
Bias: -249329140494.23785
Error: 5.60865920479758e+26
Iteration: 20
Weight: -3.5975295387343876e+21
Bias: -3.5975295387343876e+21
Error: 1.1676742667972743e+47
Iteration: 30
Weight: -5.190816747823782e+31
Bias: -5.190816747823782e+31
Error: 2.4309966849597863e+67
Iteration: 40
Weight: -7.489744898374618e+41
Bias: -7.489744898374618e+41
Error: 5.0611245364641525e+87
Iteration: 50
Weight: -1.0806830864573797e+52
Bias: -1.0806830864573797e+52
Error: 1.0536822913858977e+108
Iteration: 60
Weight: -1.5593000151560493e+62
Bias: -1.5593000151560493e+62
Error: 2.1936752656078408e+128
Iteration: 70
Weight: -2.249888582263424e+72
Bias: -2.249888582263424e+72
Error: 4.5670418970505597e+148
Iteration: 80
Weight: -3.2463275722425565e+82
Bias: -3.2463275722425565e+82
Error: 9.508185653739273e+168
Iteration: 90
Weight: -4.684073153391537e+92
Bias: -4.684073153391537e+92