Linear Regression Vectorized:

$y=X.W$ \\
$W=(X^TX)^{-1}X^Ty$


In [None]:
import numpy as np

class LinearRegression:
  def __init__(self):
    self.W = None
  def fit(self, X, y):
    n = X.shape[0]
    X = np.hstack([np.ones((n, 1)), X])
    self.W = np.linalg.inv(X.T@X)@X.T@y
  def predict(self, X):
    n = X.shape[0]
    X = np.hstack([np.ones((n,1)), X])
    return X@self.W


In [None]:
X = np.array([[2, 2], [4, 5], [7, 8]])
y = np.array([9, 17, 26])

In [None]:
lr = LinearRegression()
lr.fit(X, y)
print(lr.W)

[3. 1. 2.]


In [None]:
X_new = np.array([[10, 11], [13, 14]])
y_pred = lr.predict(X_new)
print(y_pred)

[35. 44.]


###Considerations for LinearRegressionSG:
1. Order of np.dot() matters for broadcasting, otherwise ValueError is raised for mismatched shapes.
2. Sign of the gradient (cost) should be considered carefully.

In [None]:
import numpy as np

#LinearRegression with gradient descent and regularization
class LinearRegressionGD:
  def __init__(self, regul = 0):
    self.W = None
    self.regul = regul
  def fit(self, X, y, lr = 0.01, num_iter = 1000):
    if len(X) != len(y) or len(X) == 0:
      raise ValueError("X and y must have equal length and can't be empty")
    X = np.hstack([np.ones((len(X), 1)), X])
    self.W = np.zeros(X.shape[1])
    for i in range(num_iter):
      y_pred = np.dot(X, self.W)
      cost = np.sum((y - y_pred)**2) + self.regul * np.sum(self.W**2)
      gradient = 2*np.dot(X.T, (y_pred - y)) + 2*self.regul*self.W
      self.W = self.W - lr*gradient
      if i%1000 == 0:
        print("Cost", cost)
  def predict(self, X):
    X = np.hstack([np.ones((len(X), 1)), X])
    y_pred = np.dot(X, self.W)
    return y_pred

In [None]:
X = np.array([[1,2,3,4,5]]).T
y = np.array([2, 4, 5, 4, 5])

In [None]:
linr = LinearRegressionGD(regul = 0.1)
linr.fit(X, y, lr=0.01, num_iter=10000)
print(linr.W)
y_pred = linr.predict(X)
print(y_pred)

Cost 86.0
Cost 2.8791287270130335
Cost 2.8791287270130344
Cost 2.8791287270130344
Cost 2.8791287270130344
Cost 2.8791287270130344
Cost 2.8791287270130344
Cost 2.8791287270130344
Cost 2.8791287270130344
Cost 2.8791287270130344
[1.99964292 0.65345474]
[2.65309766 3.3065524  3.96000714 4.61346188 5.26691662]


In [None]:
X.shape

(5, 1)