In [27]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import random
from tqdm import tqdm

[data sepecification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html)

In [28]:
cancer_set = load_breast_cancer()

In [29]:
class Dataset:
  def __init__(self, dataset, indices, transform=None, encoder=None):
    self.dataset, self.indices = dataset, indices
    self.transform, self.encoder = transform, encoder
  def __getitem__(self, item: int):
    idx = self.indices[item]
    feature, label = self.dataset.data[idx], self.dataset.target[idx]
    if self.transform: feature = self.transform(feature)
    if self.encoder: label = self.encoder(label)
    return feature, label
  def __len__(self): return len(self.indices)

In [30]:
indices = random.sample(range(cancer_set.data.__len__()), 100)

# init Datasets
support_set = Dataset(cancer_set, indices[:50])
query_set = Dataset(cancer_set, indices[50:])

## Logistic Regression
**Logistic Model (or Logit Model)** is a statistical method that predicts the log-odds of an event using a linear combination of variables. The most common measurement is **Cross-Entropy Loss ( or Log Loss)**, which differs from linear least squares but can still be explained as ordinary least squares.

### The Sigmoid/Logistic Function as an Activation Function
**Activation Function** is a mathematical function applied to the output. Its main purposes are adding non-linearity to the model and leveraging the output range to help make better decisions—most image recognition and NLP models cannot work without it.
$$ \text{sigmoid function} = \sigma(x) = \frac{1}{1 + e^{-x}} $$
- Although it introduces non-linearity to models, the activation function must be differentiable to calculate gradients.
- **Decision Boundary** is where the model changes its prediction. There are several types:
  - A point for a single feature $x$
  - A line for two features $x$
  - Hyperplane for higher dimensions $x$

In [31]:
def sigmoid(x): return 1 / (1 + np.exp(-1 * np.clip(x, -1e2, 1e2)))

### Formulation
$$h(x_i) = \sigma(z =\theta_0 + \theta_1 \cdot x_{(i,1)} + ... + \theta_n \cdot x_{(n, i)} + \epsilon_i) \quad\text{where is } \sigma(x) = \frac{1}{1 + e^{-z}}$$
- Input $x$ is called feature vector while output $h(x)$ is called label.
- $z$ represents the linear combination of inputs and weights
- while $z$ can be any real number, $\sigma$ (called **Sigmoid Function**) maps it to a probability space between $(0, 1)$.

In [32]:
class LogisticRegression:
  def __init__(self, n_inpt): self.weight = np.zeros(shape=(n_inpt))
  def forward(self, x): return sigmoid(np.dot(x, self.weight))
# LogisticRegression

### Cross-entropy/Log Loss
**Cross-Entropy** **Loss** is an algorithm that fits or evaluates the parameters $\theta$ as log-likelihood, which differs slightly from least squares. It ensures convexity during gradient descent and penalizes wrong predictions more heavily when the model is *"confident but wrong"*.
$$ J(\theta) = =\frac{1}{m}\sum^{m}_{i=1}{y^{(i)}\log{h(x^{i})} + (1 -y^i)\log{(1 - h(x^i))}} $$
- To minimize $J(\theta)$, update weights using the gradient: $\theta_j := \theta_j - \alpha\frac{\Delta{J(\theta)}}{\Delta{\theta_j}}$
    - Where the gradient is: $\frac{\Delta{J(\theta)}}{\Delta{\theta_j}} = \frac{1}{m} \cdot \sum^{m}_{i=1}{h_(x^i) - y^i} \cdot x_k^i$
- Vectorized update rule from the above: $\theta := \theta - \frac{\alpha}{m} \cdot x \cdot (h(x) - y)$

In [63]:
# define loss functions(MSE, MAE)
def cross_entropy_loss(independent, dependent, weight):
  probability = np.dot(independent, weight)
  return np.mean(dependent * np.log(np.clip(probability, 1e-6, 1.0)) + (1 - dependent) * np.log(np.clip(1 - probability, 1e-6, 1.0)))
# cross_entropy_loss

In [64]:
def update_rule(model, lr):
  def _update_rule(x, y):
    pred = model.forward(x)
    model.weight -= (lr / len(x)) * np.dot(x.T, (pred - y))
  return _update_rule

In [65]:
# init and train a model
model = LogisticRegression(30)
optimizer = update_rule(model, 0.001)

progress_bar = tqdm(range(100), desc="Training Logistic Regression", unit="epoch", leave=True, dynamic_ncols=True)
for _ in progress_bar:
  loss = 0.
  for feature, label in support_set:
    optimizer(feature, label)
    loss += cross_entropy_loss(feature, label, model.weight)
  progress_bar.set_postfix(loss=loss/len(support_set))

Training Logistic Regression: 100%|██████████| 100/100 [00:00<00:00, 323.57epoch/s, loss=-0.276]


In [62]:
count, n_samples = 0, len(query_set)
for feature, label in support_set:
  pred = model.forward(feature)
  if round(pred) == label: count += 1
print(f"accuracy: {count / n_samples:.2f}({count}/{n_samples})")

accuracy: 0.82(41/50)
