In [4]:
import numpy as np

In [3]:
class Model:
  def __init__(self):
    self.w = None
    self.b = None
    self.loss = None

  def fit(self, x, y, epoch=10, alpha=0.001):
    """
      x : feature matrix where each row represents one training instance, and col = feature
      y : target vector of shape (1, 1) 
    """
    # initialize w and b based on the shape of x
    n_cols_x = x.shape[1]  # number of cols in x = num of features in x
    self.w = np.random.randn(n_cols_x, 1)
    self.b = 0
    
    print(f"Initial parameters w : {self.w} b: {self.b}")
    
    # call gradient descent
    self.gradient_descent(x, y, epoch, alpha)

  def coefficients_(self):
    print("w: ", self.w)
    print("b: ", self.b)

  def gradient_descent(self, x, y, epoch, alpha=0.001):
    n = len(x) # length of training dataset
    
    for i in range(epoch):
      # first calculate y_predicted for all the samples in x_train
      y_predicted = self.predict(x)

      # calculate loss
      j = self.loss_function(y_predicted, y)   # predicted - actual
      self.loss = j

      # gradients calculation
      dj_dw = 1/n * (x.T @ (y_predicted-y))  # for entire dataset in x
      dj_db = np.mean(y_predicted-y)

      # paramters update
      self.w = self.w - (alpha * dj_dw)
      self.b = self.b - (alpha * dj_db)

      if i % 100 == 0:
        print(f"Epoch: {i+1}, Loss: {j}, w: {self.w}, b: {self.b}")

  def loss_function(self, y_predicted, y_true):
    """
    Calculating Mean Squared Error
    """
    loss = np.mean((y_predicted - y_true)**2) * 0.5
    return loss

  def predict(self, x):
    z = x @ self.w + self.b
    return z

Test this with a simple function. We have explictly choosen a linear function and we know the weights. So lets see if gradient descent can find similar weights.

In [None]:
np.random.seed(42)
x_train = 2 * np.random.rand(100, 1)
y_train = 4 + 3 * x_train + np.random.randn(100, 1)

In [50]:
model = Model()

In [51]:
model.fit(x_train, y_train, epoch=1000, alpha=0.1)

Initial parameters w : [[2.72016917]] b: 0
Epoch: 1, Loss: 9.48631468120293, w: [[3.12270739]], b: 0.4262061778378716
Epoch: 101, Loss: 0.4303375120462318, w: [[3.13743293]], b: 3.799080129310186
Epoch: 201, Loss: 0.4041740202935006, w: [[2.83643706]], b: 4.139979789970479
Epoch: 301, Loss: 0.40332102873278614, w: [[2.78208887]], b: 4.201533053491397
Epoch: 401, Loss: 0.40329321919537675, w: [[2.77227569]], b: 4.212647186735833
Epoch: 501, Loss: 0.4032923125388442, w: [[2.77050382]], b: 4.21465396835405
Epoch: 601, Loss: 0.40329228297970204, w: [[2.77018388]], b: 4.2150163153202636
Epoch: 701, Loss: 0.4032922820160042, w: [[2.77012612]], b: 4.215081741135774
Epoch: 801, Loss: 0.40329228198458544, w: [[2.77011568]], b: 4.215093554501787
Epoch: 901, Loss: 0.4032922819835612, w: [[2.7701138]], b: 4.215095687537737


In [52]:
model.coefficients_()

w:  [[2.77011346]]
b:  4.215096071216208


So we are able to get w and b close to actual values.