# Dependencies

In [1]:
import torch
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

In [2]:
# set a fixed seed
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# Loss Function

## Mean Squared Error(MSE) loss function
   - ### $L(\hat{y}, y) = \frac{1}{N} \sum_{i=1}^{N} (\hat{y}_i - y_i)^2$

## Binary Cross-Entropy(BCE) loss function
   - ### $L(\hat{y}, y) = -\frac{1}{N} \sum_{i=1}^{N} y_i \log(\hat{y}_i) + (1 - y_i) \log(1 - \hat{y}_i)$

In [3]:
def squared_error_loss(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
    return (y_pred - y_true) ** 2

def mean_squared_error_loss(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
    return np.mean((y_pred - y_true) ** 2)

In [4]:
def binary_cross_entropy_loss_per_sample(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
    # to avoid log(0)
    epsilon = 1e-15

    # clip predicted values to avoid taking the log of 0 or 1
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    
    return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_loss(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
    # to avoid log(0)
    epsilon = 1e-15

    # clip predicted values to avoid taking the log of 0 or 1
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [5]:
# sigmoid activation function
def sigmoid(x: np.ndarray) -> np.ndarray:
    return 1 / (1 + np.exp(-x))

In [6]:
# we have 3 samples for a binary classification
y_true = np.array([[0], [0], [0]])

# output of model_1
output = np.array([[0], [1.09864], [10]])
y_pred = sigmoid(output)

# log
print(f"y_true: {y_true.squeeze()}")
print(f"y_pred: {y_pred.squeeze()}")

y_true: [0 0 0]
y_pred: [0.5       0.7500052 0.9999546]


In [7]:
loss_1 = squared_error_loss(y_pred, y_true).squeeze()
loss_2 = binary_cross_entropy_loss_per_sample(y_pred, y_true).squeeze()
loss_3 = mean_squared_error_loss(y_pred, y_true)
loss_4 = binary_cross_entropy_loss(y_pred, y_true)

# log
print(f"SELoss  [per sample]: {loss_1}")
print(f"BCELoss [per sample]: {loss_2}")
print(f"MSELoss             : {loss_3:.5f}")
print(f"BCELoss             : {loss_4:.5f}")

SELoss  [per sample]: [0.25       0.56250779 0.99990921]
BCELoss [per sample]: [ 0.69314718  1.38631514 10.0000454 ]
MSELoss             : 0.60414
BCELoss             : 4.02650


## MSELoss & BCELoss Plot

In [None]:
# plot
y_true = np.zeros(shape= (100, 1))
y_pred = sigmoid(np.linspace(-10, +10, 100).reshape(-1, 1))
bce_loss = binary_cross_entropy_loss_per_sample(y_pred, y_true)
mse_loss = squared_error_loss(y_pred, y_true)

plt.plot(y_pred, bce_loss, label= 'BCELoss')
plt.plot(y_pred, mse_loss, label= 'MSELoss')
plt.title(f"y_true: {y_true[0, 0]}")
plt.xlabel("y_pred")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Logistic Regression

In [None]:
n_samples, n_features = 10, 2

x, y = datasets.make_classification(n_samples= n_samples, n_features= n_features, n_informative= 2, n_redundant= 0, n_clusters_per_class= 1, random_state= 42)

plt.scatter(x[y == 0][:, 0], x[y == 0][:, 1], color= 'b', label= 'Class 0')
plt.scatter(x[y == 1][:, 0], x[y == 1][:, 1], color= 'r', label= 'Class 1')
plt.title('Generated Data')
plt.legend()
plt.show()

In [10]:
# convert numpy.ndarray to torch.Tensor
train_x = torch.from_numpy(x.astype(np.float32))
train_y = torch.from_numpy(y.astype(np.float32)).view(-1, 1)

In [11]:
# create a custom logistic regression model
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.classifier = torch.nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        x = self.classifier(x)
        x = torch.sigmoid(x)
        return x

model = LogisticRegression(n_features, 1)

model

LogisticRegression(
  (classifier): Linear(in_features=2, out_features=1, bias=True)
)

In [12]:
# create a custom BCELoss() function
class BCELoss(torch.nn.Module):
    def __init__(self):
        super(BCELoss, self).__init__()

    def forward(self, y_pred, y_true):
        return torch.nn.BCELoss()(y_pred, y_true)

In [17]:
state = []

# initial weight
with torch.no_grad():
    model.classifier.weight[0, 0].fill_(-1)
    model.classifier.weight[0, 1].fill_(1)
    model.classifier.bias[0].fill_(1)

# hyper parameters
epoch = 6
lr = .5
criterion = BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr= lr)

# training loop
model.train()

for i in range(epoch):
    
    # forward
    y_pred = model(train_x)

    # backward
    loss = criterion(y_pred, train_y)
    loss.backward()

    # save new y_pred every 5 epochs
    state.append([model.classifier.weight.detach().clone().numpy(), model.classifier.bias.detach().clone().numpy()])

    # update parameters
    optimizer.step()
    optimizer.zero_grad()

    # log
    print(f"epoch: {i} -> loss: {loss.item():>7.5f}")

epoch: 0 -> loss: 0.38442
epoch: 1 -> loss: 0.34141
epoch: 2 -> loss: 0.30671
epoch: 3 -> loss: 0.27825
epoch: 4 -> loss: 0.25456
epoch: 5 -> loss: 0.23458


In [None]:
# plot
fig, axs = plt.subplots(nrows= 3, ncols= 2, figsize= (12, 16), layout= 'compressed')

for row in range(3):
    for col in range(2):
        axs[row, col].scatter(x[y == 0][:, 0], x[y == 0][:, 1], color= 'b', label= 'Class 0')
        axs[row, col].scatter(x[y == 1][:, 0], x[y == 1][:, 1], color= 'r', label= 'Class 1')
        axs[row, col].set(title= f"epoch {row * 2 + col}, W: {state[row * 2 + col][0].squeeze()}, b: {state[row * 2 + col][1].squeeze():.3f}", xlim= (x[:, 0].min() - 1, x[:, 0].max() + 1), ylim= (x[:, 1].min() - 1, x[:, 1].max() + 1))

        # decision boundary
        w, b = state[row * 2 + col]
        slope = -w[0][0] / w[0][1]
        intercept = -b[0] / w[0][1]
        x_plot = np.array([np.min(x[:, 0]), np.max(x[:, 0])])
        y_plot = slope * x_plot + intercept

        axs[row, col].plot(x_plot, y_plot, color='g', linestyle='--', label='Decision Boundary')
        axs[row, col].legend()

plt.show()