In [251]:
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd
from tqdm import tqdm

In [252]:
iris_set = load_iris()

[dataset specification](https://scikit-learn.org/1.5/modules/generated/sklearn.datasets.load_iris.html)
### Predict type of Iris by given Features
**class**: [setosa, versicolour, virginica]
**Number of Instances**: `150` (50 in each of three classes)
**Number of Attributes**: `4` numeric, predictive attributes and the class
**Attribute Information**:
* sepal length in `cm`
* sepal width in `cm`
* petal length in `cm`
* petal width in `cm`

In [253]:
class Dataset:
  def __init__(self, dataset, indices, transform=None, encoder=None):
    self.dataset, self.indices = dataset, indices
    self.transform, self.encoder = transform, encoder
  def __getitem__(self, item: int):
    idx = self.indices[item]
    feature, label = self.dataset.data[idx], self.dataset.target[idx]
    if self.transform: feature = self.transform(feature)
    if self.encoder: label = self.encoder(label)
    return feature, label
  def __len__(self): return len(self.indices)

### Loss Functions: MSE and MAE

Loss functions are a critical component in machine learning models, particularly for regression tasks. They quantify the difference between the predicted values and the actual target values, guiding the optimization process to minimize this error.

#### Mean Squared Error (MSE)
MSE calculates the average of the squared differences between the predicted and actual values. It is defined as:

$$\text{MSE} = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2$$

- **Penalizes larger errors more heavily**: Squaring the errors makes MSE more sensitive to outliers.
- **Continuous optimization**: Smooth gradients make it suitable for many gradient-based optimization algorithms.
- **Use Case**: Ideal when large errors need to be penalized significantly.

### Mean Absolute Error (MAE)
MAE calculates the average of the absolute differences between the predicted and actual values. It is defined as:
$$\text{MAE} = \frac{1}{n} \sum_{i=1}^{n} |y_i - \hat{y}_i|$$

- **Treats all errors equally**: MAE is less sensitive to outliers compared to MSE.
- **Robust to Outliers**: Does not disproportionately penalize larger deviations.
- **Use Case**: Ideal for datasets where outliers are present and need to be treated equally.

In [254]:
# define loss functions(MSE, MAE)
def mean_squared_error(independent, dependent, weight):
  probability = np.dot(independent, weight)
  return np.mean((probability - dependent) ** 2)
# mean_squared_error

def mean_absolute_error(independent, dependent, weight):
  probability = np.dot(independent, weight)
  return np.mean(abs(probability - dependent))
# mean_absolute_error

### Gradient Descent with Regularization (GDR)
Gradient Descent with Regularization (GDR) is an optimization technique used to minimize the loss function while controlling the complexity of the model. By incorporating regularization terms into the loss function, GDR helps to prevent overfitting and improves the generalization of the model to unseen data. Update rule:
$$\theta = \theta - \eta \cdot \nabla L(\theta)$$
Gradient Descent is an iterative optimization algorithm used to minimize the loss function by updating model parameters in the direction of the steepest descent of the gradient.

In [255]:
# define LinearRegression
class LinearRegression:
  def __init__(self, n_inpt): self.weight = np.zeros(shape=(n_inpt))
  def gdr(self, x, y, lr):
    indications = self.forward(x)
    self.weight -= (lr / x.shape[0]) * np.dot(x.T, (indications - y))
  # gdr
  def train(self, dataset, iters: int, lr=0.01):
    for _ in range(iters):
      for feature, label in dataset: self.gdr(feature, label, lr=lr)
  # train
  def forward(self, x): return np.dot(x, self.weight)
# LogisticRegression

In [256]:
def GDR(model, lr):
  def _GDR(x, y):
    pred = model.forward(x)
    model.weight -= lr * np.dot(x.T, (pred - y))
  return _GDR

In [257]:
progress_bar = tqdm(range(10))

# init and train a model
model = LinearRegression(4)
optimizer = GDR(model, 0.001)
for _ in progress_bar:
  loss = 0.
  for feature, label in support_set:
    optimizer(feature, label)
    loss += mean_squared_error(feature, label, model.weight)
  progress_bar.set_postfix(loss=loss/len(support_set))

100%|██████████| 10/10 [00:00<00:00, 182.31it/s, loss=0.0912]


### Ordinal Linear Regression
[wikipedia](https://en.wikipedia.org/wiki/Ordinal_regression)
Ordinal Linear Regression is a statistical model used to predict an ordinal variable based on a given feature vector. The model outputs a continuous value (a float) within specific ranges corresponding to the ordinal categories. These predicted values are then mapped to discrete ordinal labels to classify instances appropriately.
* **around 0 or below:** represents setosa
* **around 1:** represents versicolour
* **around 3 or above:** represents virginica

In [258]:
count, n_samples = 0, len(query_set)
for feature, label in support_set:
  pred = model.forward(feature)
  if round(pred) == label: count += 1
print(f"accuracy: {count / n_samples:.2f}({count}/{n_samples})")

accuracy: 0.92(46/50)
