# 1. Linear Regression

## 1.1 Numpy Closed form

In [44]:
import numpy as np

In [45]:
np.random.seed(42)

In [46]:
m = 100
n = 5
X = np.random.randn(m, n)
X.shape

(100, 5)

In [47]:
w_true = np.array([[4], [-2], [1.9], [0.5], [-2.9]])
b_true = 4.2

In [48]:
y = X @ w_true + b_true + np.random.randn(m, 1) * 0.5
y.shape

(100, 1)

In [49]:
X_b = np.c_[np.ones((m, 1)), X]
X_b.shape

(100, 6)

In [50]:
theta_best = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y

In [51]:
print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"closed form solution: {theta_best.ravel()}")

w_true: [ 4.  -2.   1.9  0.5 -2.9]
b_true: 4.2
closed form solution: [ 4.12693763  4.02710641 -1.93224448  1.88935295  0.58037361 -2.90361573]


## 1.2 Numpy GD

In [52]:
w = np.random.randn(n, 1)
b = 0.0

lr = 0.05
epochs = 1000

for epoch in range(epochs):
    y_pred = X @ w + b
    error = y_pred - y
    MSE_loss = np.mean(error ** 2)

    dw = X.T @ error / m
    db = np.mean(error)

    w = w - lr * dw
    b = b - lr * db

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"GS solution: {w.ravel()} {b}")

w_true: [ 4.  -2.   1.9  0.5 -2.9]
b_true: 4.2
GS solution: [ 4.02710641 -1.93224448  1.88935295  0.58037361 -2.90361573] 4.126937631685491


## 1.3 Pytorch

In [53]:
import torch
import torch.nn as nn
import torch.optim as optim

In [54]:
model = nn.Linear(n, 1)
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=lr)

X_tensor = torch.from_numpy(X).float()
y_tensor = torch.from_numpy(y).float()

for epoch in range(epochs):
    y_pred = model(X_tensor) # forward pass
    loss = loss_fn(y_pred, y_tensor)

    loss.backward() #similar to numpy dw and db calculation
    optimizer.step() # similar to numpy update w and b by dw db lr
    optimizer.zero_grad() #clear the gradient

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"Pytorch solution: {model.weight.detach().numpy()} {model.bias.item()}")

w_true: [ 4.  -2.   1.9  0.5 -2.9]
b_true: 4.2
Pytorch solution: [[ 4.0271034 -1.9322453  1.8893536  0.5803733 -2.9036171]] 4.126936435699463


## 1.4 Sklearn

In [55]:
from sklearn.linear_model import LinearRegression

linear_model = LinearRegression()
linear_model.fit(X, y)

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"Sklearn solution: {linear_model.coef_.ravel()} {linear_model.intercept_}")

w_true: [ 4.  -2.   1.9  0.5 -2.9]
b_true: 4.2
Sklearn solution: [ 4.02710641 -1.93224448  1.88935295  0.58037361 -2.90361573] [4.12693763]


# 2. Logistic Regression

## 2.1 Numpy

In [56]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [57]:
m = 10000
n = 5
X = np.random.randn(m, n)
w_true = np.array([[2], [-3], [4], [-3], [-1]]) #combining b to make y==0 and 1 close to half half
b_true = 0.5

logits = X @ w_true + b_true
probs = sigmoid(logits)
y = (probs > 0.5).astype(int)

In [58]:
y.shape

(10000, 1)

In [59]:
sum(y)

array([5238])

In [60]:
w = np.random.randn(n, 1)
b = 0.0

lr = 0.05
epochs = 1000

for epoch in range(epochs):
    y_pred = X @ w + b
    dz = y_pred - y
    loss = -np.mean(y * np.log(y_pred + 1e-6) + (1 - y) * np.log(1 - y_pred + 1e-6))

    dw = X.T @ dz / m
    db = np.mean(dz)

    w = w - lr * dw
    b = b - lr * db

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"GD solution: {w.ravel()} {b}")

  loss = -np.mean(y * np.log(y_pred + 1e-6) + (1 - y) * np.log(1 - y_pred + 1e-6))


w_true: [ 2 -3  4 -3 -1]
b_true: 0.5
GD solution: [ 0.13143723 -0.1884101   0.25349303 -0.19215729 -0.06492323] 0.526950586272894


## 2.2 Pytorch

just use linear model, but using different loss function, should be good

In [63]:
X_tensor = torch.from_numpy(X).float()
y_tensor = torch.from_numpy(y).float()

model = nn.Linear(n, 1)
loss_fn = nn.BCEWithLogitsLoss()  # 内置 sigmoid + BCE
optimizer = optim.SGD(model.parameters(), lr=lr)

for epoch in range(epochs):
    logits_pred = model(X_tensor) # forward pass to logits using just linear model
    loss = loss_fn(logits_pred, y_tensor) 

    loss.backward() #similar to numpy dw and db calculation
    optimizer.step() # similar to numpy update w and b by dw db lr
    optimizer.zero_grad() #clear the gradient

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"Pytorch solution: {model.weight.detach().numpy()} {model.bias.item()}")

w_true: [ 2 -3  4 -3 -1]
b_true: 0.5
Pytorch solution: [[ 1.3123004 -1.9089855  2.595153  -1.9481338 -0.6411347]] 0.28791528940200806


## 2.3 Sklearn

In [67]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(penalty=None, solver='lbfgs', max_iter=1000)
log_reg.fit(X, y)

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"Sklearn solution: {log_reg.coef_.ravel()} {log_reg.intercept_}")

w_true: [ 2 -3  4 -3 -1]
b_true: 0.5
Sklearn solution: [ 223.03793023 -334.9691114   445.44523275 -335.28515157 -111.5357643 ] [55.61447073]


  y = column_or_1d(y, warn=True)


In [69]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X, y)

print(f"w_true: {w_true.ravel()}")
print(f"b_true: {b_true}")
print(f"Sklearn solution: {log_reg.coef_.ravel()} {log_reg.intercept_}")

w_true: [ 2 -3  4 -3 -1]
b_true: 0.5
Sklearn solution: [ 5.92114355 -8.80560919 11.86826819 -8.93676922 -3.00842441] [1.44418849]


  y = column_or_1d(y, warn=True)
