## Regularization

In [None]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from plt_overfit import overfit_example, output
from lab_utils_common import sigmoid
np.set_printoptions(precision=8)

### Regularized cost function for linear regression

In [None]:
def compute_cost_linear_reg(x, y, w, b, lambda_):
  m=x.shape[0]
  n=len(w)

  cost=0
  for i in range(m):
    f_wb_i=np.dot(w, x[i])+b
    cost=cost+(f_wb_i-y[i])**2

  cost=cost/(2*m)

  reg_cost=0
  for j in range(n):
    reg_cost=reg_cost+(w[j]**2)

  reg_cost=(lambda_/(2*m))*reg_cost

  total_cost=cost+reg_cost

  return total_cost

In [None]:
np.random.seed(1)

x_tmp=np.random.rand(5, 6)
y_tmp=np.array([0, 1, 0, 1, 0])
w_tmp=np.random.rand(x_tmp.shape[1]).reshape(-1, )-0.5
b_tmp=0.5
lambda_tmp=0.7

cost_lin_tmp=compute_cost_linear_reg(x_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)
print(f'Linear regularized cost: {cost_lin_tmp}')

### Regularized cost function for logistic regression

In [None]:
def compute_cost_logistic_reg(x, y, w, b, lambda_):
  m, n=x.shape
  cost=0

  for i in range(m):
    z_i=np.dot(w, x[i])+b
    f_wb_i=sigmoid(z_i)
    cost+=(-y[i]*np.log(f_wb_i))-((1-y[i])*np.log(1-f_wb_i))

  cost=cost/m

  reg_cost=0
  for j in range(n):
    reg_cost=reg_cost+(w[j]**2)

  reg_cost=reg_cost*(lambda_/(2*m))

  total_cost=cost+reg_cost

  return total_cost

In [None]:
np.random.seed(1)

x_tmp=np.random.rand(5, 6)
y_tmp=np.array([0, 1, 0, 1, 0])
w_tmp=np.random.rand(x_tmp.shape[1]).reshape(-1, )-0.5
b_tmp=0.5
lambda_tmp=0.7

cost_lin_tmp=compute_cost_logistic_reg(x_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)
print(f'Logistic regularized cost: {cost_lin_tmp}')

### Gradient function for regularized linear regression

In [None]:
def compute_gradient_linear_reg(x, y, w, b, lambda_):
  m, n=x.shape
  dj_dw=np.zeros(n)
  dj_db=0

  for i in range(m):
    error=(np.dot(w, x[i])+b)-y[i]
    for j in range(n):
      dj_dw[j]=dj_dw[j]+error*x[i, j]
    dj_db=dj_db+error

  dj_dw=dj_dw/m
  dj_db=dj_db/m

  for j in range(n):
    dj_dw[j]+=(lambda_/m)*w[j]

  return dj_dw, dj_db

In [None]:
np.random.seed(1)

x_tmp=np.random.rand(5, 3)
y_tmp=np.array([0, 1, 0, 1, 0])
w_tmp=np.random.rand(x_tmp.shape[1])
b_tmp=0.5
lambda_tmp=0.7

dj_dw_tmp, dj_db_tmp=compute_gradient_linear_reg(x_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f'Linear regularized gradients are dj_dw: {dj_dw_tmp}, dj_db: {dj_db_tmp}')

### Gradient function for regularized logistic regression

In [None]:
def compute_gradient_logistic_reg(x, y, w, b, lambda_):
  m, n=x.shape
  dj_dw=np.zeros(n)
  dj_db=0

  for i in range(m):
    error=sigmoid(np.dot(w, x[i])+b)-y[i]
    for j in range(n):
      dj_dw[j]=dj_dw[j]+error*x[i, j]
    dj_db=dj_db+error

  dj_dw=dj_dw/m
  dj_db=dj_db/m

  for j in range(n):
    dj_dw[j]=dj_dw[j]+(lambda_/m)*w[j]

  return dj_dw, dj_db

In [None]:
np.random.seed(1)

x_tmp=np.random.rand(5, 3)
y_tmp=np.array([0, 1, 0, 1, 0])
w_tmp=np.random.rand(x_tmp.shape[1])
b_tmp=0.5
lambda_tmp=0.7

dj_dw_tmp, dj_db_tmp=compute_gradient_logistic_reg(x_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f'Logistic regularized gradients are dj_dw: {dj_dw_tmp}, dj_db: {dj_db_tmp}')

In [None]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_func, gradient_func, lambda_):
  w=copy.deepcopy(w_in)
  b=b_in
  J_hist=[]

  for i in range(num_iters):
    dj_dw, dj_db=gradient_func(x, y, w, b, lambda_)

    w=w-alpha*dj_dw
    b=b-alpha*dj_db

    if i<100000:
      J_hist.append(cost_func(x, y, w, b, lambda_))

    if i%math.ceil(num_iters/10)==0:
      print(f'Iteration {i:4d}: Cost {J_hist[-1]}')

  return w, b, J_hist

### Gradient descent for regularized linear regression

In [None]:
np.random.seed(1)

x_tmp=np.random.rand(5, 3)
y_tmp=np.array([0, 1, 0, 1, 0])
w_tmp=np.random.rand(x_tmp.shape[1])
b_tmp=0.5
lambda_tmp=0.7
alpha=0.1
iterations=10000

w, b, hist=gradient_descent(x_tmp, y_tmp, w_tmp, b_tmp, alpha, iterations,
                            compute_cost_linear_reg, compute_gradient_linear_reg,
                            lambda_tmp)

print(f'Linear regularized parameters are w: {w}, b: {b}')

### Gradient descent for regularized logistic regression

In [None]:
np.random.seed(1)

x_tmp=np.random.rand(5, 3)
y_tmp=np.array([0, 1, 0, 1, 0])
w_tmp=np.random.rand(x_tmp.shape[1])
b_tmp=0.5
lambda_tmp=0.7
alpha=0.1
iterations=10000

w, b, hist=gradient_descent(x_tmp, y_tmp, w_tmp, b_tmp, alpha, iterations,
                            compute_cost_logistic_reg, compute_gradient_logistic_reg,
                            lambda_tmp)

print(f'Logistic regularized parameters are w: {w}, b: {b}')

In [None]:
plt.close('all')
display(output)
ofit=overfit_example(True)