# Load Dataset

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

# Linear Regression with Gradient Descent

In [2]:
import numpy as np
import pandas as pd

In [3]:
X_train_norm = (X_train - np.mean(X_train))/np.std(X_train)
Y_train_norm = (y_train - np.mean(y_train))/np.std(y_train)

Functions to fit the models automatically are convenient to use, but for an in-depth understanding of the model and the maths behind it is good to implement an algorithm by yourself. Let's try to find linear regression coefficients $m$ and $b$, by minimising the difference between original values $y^{(i)}$ and predicted values $\hat{y}^{(i)}$ with the **loss function** $L\left(w, b\right)  = \frac{1}{2}\left(\hat{y}^{(i)} - y^{(i)}\right)^2$ for each of the training examples. Division by $2$ is taken just for scaling purposes, you will see the reason below, calculating partial derivatives.

To compare the resulting vector of the predictions $\hat{Y}$ with the vector $Y$ of original values $y^{(i)}$, you can take an average of the loss function values for each of the training examples:

$$E\left(m, b\right) = \frac{1}{2n}\sum_{i=1}^{n} \left(\hat{y}^{(i)} - y^{(i)}\right)^2 =
\frac{1}{2n}\sum_{i=1}^{n} \left(mx^{(i)}+b - y^{(i)}\right)^2,\tag{1}$$

where $n$ is a number of data points. This function is called the sum of squares **cost function**. To use gradient descent algorithm, calculate partial derivatives as:

\begin{align}
\frac{\partial E }{ \partial m } &=
\frac{1}{n}\sum_{i=1}^{n} \left(mx^{(i)}+b - y^{(i)}\right)x^{(i)},\\
\frac{\partial E }{ \partial b } &=
\frac{1}{n}\sum_{i=1}^{n} \left(mx^{(i)}+b - y^{(i)}\right),
\tag{2}\end{align}

and update the parameters iteratively using the expressions

\begin{align}
m &= m - \alpha \frac{\partial E }{ \partial m },\\
b &= b - \alpha \frac{\partial E }{ \partial b },
\tag{3}\end{align}

where $\alpha$ is the learning rate.

In [4]:
def linear_reg(m, X, b):
  return np.dot(X, m) + b

def dEdm(m, b, X, Y):
  Y_pred = linear_reg(m, X, b)
  return 1/len(X) * np.dot(X.T, (Y_pred - Y))

def dEdb(m, b, X, Y):
  Y_pred = linear_reg(m, X, b)
  return 1/len(X) * np.sum(Y_pred - Y)

def gradient_descent(m, b, X, Y, learning_rate, num_iterations):
  for iteration in range(num_iterations):
      m_new = m - learning_rate * dEdm(m, b, X, Y)
      b_new = b - learning_rate * dEdb(m, b, X, Y)
  return m_new, b_new

In [5]:
m_initial = np.zeros(X.shape[1])
b_initial = 0
num_iterations = 10
learning_rate = 1

m_gd, b_gd = gradient_descent(m_initial, b_initial,
                              X_train_norm, Y_train_norm, learning_rate, num_iterations)

print(f"Gradient descent result: m_min, b_min = {m_gd}, {b_gd}")

Gradient descent result: m_min, b_min = [ 0.31729063 -0.09419191  0.82920667  0.35719057], 5.921189464667501e-17


In [7]:
# Linear Regression Model
y_test_lr = linear_reg_model.predict(X_test)

# Preprocess test data
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)

y_test_gd_norm = np.dot(X_test_norm, m_gd) + b_gd
y_test_gd = y_test_gd_norm * np.std(y_train) + np.mean(y_train)

# MAE
mae_lr = np.mean(np.abs(y_test - y_test_lr))
mae_gd = np.mean(np.abs(y_test - y_test_gd))

print(f"Sklearn LR MAE: {mae_lr}")
print(f"Gradient Descent MAE: {mae_gd}")

Sklearn LR MAE: 0.16775968102146444
Gradient Descent MAE: 0.1996832049365863


# Training

In [6]:
# Linear Regression
from sklearn.linear_model import LinearRegression
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train)

# Logistic Regression
from sklearn.linear_model import LogisticRegression
logistic_reg_model = LogisticRegression()
logistic_reg_model.fit(X_train, y_train)

# Support Vector Classification (SVC)
from sklearn.svm import SVC
svc_model = SVC()
svc_model.fit(X_train, y_train)

# Decision Trees
from sklearn.tree import DecisionTreeClassifier
decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(X_train, y_train)

# Random Forest
from sklearn.ensemble import RandomForestClassifier
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)

# Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier
gradient_boosting_model = GradientBoostingClassifier()
gradient_boosting_model.fit(X_train, y_train)

# K-Nearest Neighbors (KNN)
from sklearn.neighbors import KNeighborsClassifier
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Evaluation

In [8]:
from sklearn.metrics import mean_absolute_error, accuracy_score

# Evaluate models
regression_models = [linear_reg_model]

classification_models = [logistic_reg_model, svc_model, decision_tree_model,
          random_forest_model, gradient_boosting_model, knn_model, naive_bayes_model]

for model in regression_models:
  y_pred = model.predict(X_test)
  mae = mean_absolute_error(y_test, y_pred)
  model_name = type(model).__name__
  print(f"{model_name} MAE: {mae:.2f}")

  # Other metrics: MSE, RMSE, R2

for model in classification_models:
  y_pred = model.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  model_name = type(model).__name__
  print(f"{model_name} Accuracy: {accuracy*100:.2f}%")

  # Other metrics: Precision, Recall, F1, Confusion Matrix

LinearRegression MAE: 0.17
LogisticRegression Accuracy: 100.00%
SVC Accuracy: 96.67%
DecisionTreeClassifier Accuracy: 96.67%
RandomForestClassifier Accuracy: 96.67%
GradientBoostingClassifier Accuracy: 96.67%
KNeighborsClassifier Accuracy: 96.67%
GaussianNB Accuracy: 96.67%
