## Linear Regression model:

i choose the analytical solution with the psuedo inverse matrix so you won't see loss curve here because there is no learning process it's instant. used the model to classify MNIST dataset.

In [1]:
# initial constants

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

WEIGHTS_SIZE=784
LABELS_SIZE=10

In [2]:
# Utility functions

def one_hot_encode(labels):
  """
    One hot encode labels 3=>[0,0,0,1,0,0,0,0,0,0]
  """
  one_hot_encoded=np.zeros((labels.size,LABELS_SIZE))
  one_hot_encoded[np.arange(labels.size),labels]=1

  return one_hot_encoded

def format_percent(num,denom):
  """
    format percentage for printing the predictions
    send numerator denominator get the percentage
  """
  frac=num/denom if denom!=0 else 1
  return '{0:.2f}'.format(frac*100 if frac<=1 and frac>=0 else frac)


In [3]:
class LinearRegression:

  def __init__(self,X_train,y_train,X_test,y_test):
    self.X_train=X_train
    self.y_train=y_train
    self.X_test=X_test
    self.y_test=y_test

    # W=PINV(X^t*X)*X^t*y
    self.weights=np.dot(np.linalg.pinv(np.dot(X_train.T,X_train)),np.dot(X_train.T,y_train)).T


  def predict(self,x):
    """
    Predict labels for given X vector
    """
    return np.argmax(np.dot(x,self.weights.T),axis=1)

  def compute_confusion_mat(self):
      """
      Compute and print the confusion matrixes on test data
      """

      train_label_preds = self.predict(self.X_train)
      train_zipped=list(zip(train_label_preds, np.argmax(self.y_train,axis=1)))

      test_label_preds = self.predict(self.X_test)
      test_zipped=list(zip(test_label_preds, np.argmax(self.y_test,axis=1)))

      total_test_acc=np.ones(self.X_test.shape[0])[test_label_preds==np.argmax(self.y_test,axis=1)].sum()
      total_train_acc=np.ones(self.X_train.shape[0])[train_label_preds==np.argmax(self.y_train,axis=1)].sum()

      # Confusion matrix for each classifier
      for i in range(len(self.weights)):

          test_tn = len(list(filter(lambda l: l[0] != i and l[1] != i, test_zipped)))
          test_fn = len(list(filter(lambda l: l[0] == i and l[1] != i, test_zipped)))
          test_tp = len(list(filter(lambda l: l[0] == i and l[1] == i, test_zipped)))
          test_fp = len(list(filter(lambda l: l[0] != i and l[1] == i, test_zipped)))

          train_tn = len(list(filter(lambda l: l[0] != i and l[1] != i, train_zipped)))
          train_fn = len(list(filter(lambda l: l[0] == i and l[1] != i, train_zipped)))
          train_tp = len(list(filter(lambda l: l[0] == i and l[1] == i, train_zipped)))
          train_fp = len(list(filter(lambda l: l[0] != i and l[1] == i, train_zipped)))

          print("------------------------------------------------------------------------------------------------------------------")
          print(f"--------\tConfusion Matrix for digit {i}\t--------")
          print("------------------------------------------------------------------------------------------------------------------")
          print("\t|\tTrue\t|\tFalse")
          print("---------------------------------------------")
          print(f"Positive|\t{test_tp}\t|\t{test_fp}")
          print(f"Negative|\t{test_tn}\t|\t{test_fn}")
          print("------------------------------------------------------------------------------------------------------------------")
          print(f"Train Accuracy: {format_percent(train_tp+train_tn,train_tp+train_tn+train_fp+train_fn)}\t|\tTest Accuracy: {format_percent(test_tp+test_tn,test_tp+test_tn+test_fp+test_fn)}\t|\tSensitivity: {format_percent(test_tp,test_tp+test_fn)}\t|\tSelectivity: {format_percent(test_tn,test_tn+test_fp)}")
          print("------------------------------------------------------------------------------------------------------------------\n\n")

      print("------------------------------------------------------------------------------------------------------------------")
      print(f"Total model Accuracy:")
      print("------------------------------------------------------------------------------------------------------------------")
      print(f"Train Accuracy:{format_percent(total_train_acc,self.X_train.shape[0])}\t|\tTest Accuracy:{format_percent(total_test_acc,self.X_test.shape[0])}")
      print("------------------------------------------------------------------------------------------------------------------\n")
      # Compute confusion matrix for multi label model
      confusion_matrix = [[0] * len(self.weights) for _ in range(len(self.weights))]
      for true_label, predicted_label in test_zipped:
          confusion_matrix[predicted_label][true_label] += 1
      # Print matrix
      print("Confusion Matrix:")
      print("Predicted\Actual", end="\t")
      for i in range(len(confusion_matrix)):
          print(f"Digit {i}", end="\t")
      print()

      for i in range(len(confusion_matrix)):
          print(f"Digit {i}", end="\t\t\t")
          for j in range(len(confusion_matrix[i])):
              print(confusion_matrix[i][j], end="\t")
          print()

In [5]:
# Fetch MNIST dataset
mnist = fetch_openml('mnist_784', version=1, parser='auto')
# Access features (pixel values) and labels
X, y = mnist['data'], mnist['target']

# Convert X,y to numpy arrays and normalize X
X, y = X.to_numpy(dtype=float)/255, y.to_numpy(dtype=int)

# One hot encode y
y = one_hot_encode(y)

# Append 1 to each sample for the bias
X = np.array(list(map(lambda sample: np.append([1], sample), X)))

# Split dataset into train and test sets 60k train and 10k test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 7)


In [6]:
l=LinearRegression(X_train,y_train,X_test,y_test)
l.compute_confusion_mat()

------------------------------------------------------------------------------------------------------------------
--------	Confusion Matrix for digit 0	--------
------------------------------------------------------------------------------------------------------------------
	|	True	|	False
---------------------------------------------
Positive|	941	|	56
Negative|	8907	|	96
------------------------------------------------------------------------------------------------------------------
Train Accuracy: 98.58	|	Test Accuracy: 98.48	|	Sensitivity: 90.74	|	Selectivity: 99.38
------------------------------------------------------------------------------------------------------------------


------------------------------------------------------------------------------------------------------------------
--------	Confusion Matrix for digit 1	--------
------------------------------------------------------------------------------------------------------------------
	|	True	|	False
----------


# Conclusion

##comparing all three models :

- **perceptron** => got second best results so far i didnt have much time working on the linear regression one but i belive if i did it the iterative way i could beat the perceptron accuracy.

- **linear regression** => the quickest at training and nice results too, considering it's training time it's the best "value for money". i believe i could get better result while using GD but didn't had the time play with that.

- **logistic regression** => the best performance so far it's very logical cause it's suited to this kind of problem while the other ones don't and we used them in a kind of unconventional way to make them function as a multi label classifiers.