<a href="https://colab.research.google.com/github/mahroo12/COMP-551-Assignment-2/blob/main/Comp551_Project2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import necessary Libraries**

In [1]:
import numpy as np
import pandas as pd
import math
%matplotlib inline
import matplotlib.pyplot as plt
import scipy.sparse
from IPython.core.debugger import set_trace
import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'numpy'

**Part 1: Read in and Process Data**


1.   Import Digits Dataset
[digits_datatest](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html)
2.   Import Iris Dataset
[iris_dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html)





In [None]:
# Import digits dataset
from sklearn.datasets import load_digits
digits = load_digits()

# Additional OpenML dataset used in this project is the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()

**Part 2: Soft Max Regression**



In [None]:
##########################################
# Gradient Descent w/ Momentum Optimizer #
##########################################

In [None]:
class MiniBatchGradientDescent:

  def __init__(self,learning_rate=.001,max_iters=1e4,epsilon=1e-8, record_history=False,batch_size=32):
    self.learning_rate = learning_rate      # alpha value
    self.max_iters = max_iters              # epchos = passes through data set
    self.record_history = record_history
    self.epsilon = epsilon                  # termination condition
    self.batch_size = batch_size            # if 1, then SGD

    if record_history:
      self.w_history = []

  #################################################
  # METHOD: Get Batch from Data Set
  #################################################
  # PARAMETERS:
  # X: training vectors, shape = [n_samples,n_features] = NxD
  # y: target values (classes), shape = [n_samples] = N
  # i: batch to get
  # RETURNS:
  # X: data for this batch
  # y: classes for this batch
  #################################################
  def get_batches(self,X,y,i):
      X_new = X[i:i+self.batch_size,:] #,: added to end of this for some reason?
      y_new = y[i:i+self.batch_size]
      return X_new, y_new


  #################################################
  # METHOD: Mini-Batch Gradient Descent
  #################################################
  # PARAMETERS:
  # X: training vectors, shape = [n_samples,n_features]
  # y: target values, shape = [n_samples]
  # w: initial weight used in calculation
  # RETURNS:
  # m: final weight (used for optimization)
  #################################################
  def run(self,gradient_fn,X,y,w):
    num_batches = int(X.shape[0]/self.batch_size)    # number of batches = samples / batch size
    gradient = np.inf                                # initialize gradient to infinity
    beta = 0.1
    grad = np.inf
    delta_w = 0

    i = 1
    while np.linalg.norm(grad) > self.epsilon and i < self.max_iters:

      for t in range(0,num_batches):
        X_batch, y_batch = self.get_batches(X,y,t)

        grad = gradient_fn(X_batch,y_batch,w)

        delta_w = beta * delta_w + (1-beta) * grad
        w = w - self.learning_rate * delta_w.T

        if self.record_history:
          self.w_history.append(w)

      i += 1
    return w

**Multi-class Logistic Regression**

$\sigma(z) = \frac{1}{1+e^{-z}}$ is the logistic function, called sigmoid function.

In [None]:
class MultiClassLogisticRegression:

  # initialize model parameters
  def __init__(self, add_bias=True, learning_rate=.1, epsilon=1e-4, max_iters=1e5):
    self.add_bias = add_bias
    self.learning_rate = learning_rate
    self.epsilon = epsilon
    self.max_iters = max_iters
    self.w = []

  # softmax function takes an N-dimensional vector X of real numbers
  # transforms into valid probability distribution
  def softmax(self, X):
    exps = np.exp(X - np.max(X))      # exponentiates, forces positive values
    norms = np.sum(exps)              # normalizes values to sum to 1
    return exps / norms


  # encode categorical variable values to numbers
  # add 1s for unique classes and 0s in rest of matrix
  def one_hot_encoding(self, y):
    m = y.shape[0]
    OHX = scipy.sparse.csr_matrix((np.ones(m), (y, np.array(range(m)))))
    OHX = np.array(OHX.todense()).T
    return OHX

  # maximizes log likelihood given correct labels
  def crossEntropy(yhat,y):
    return - nd.sum(y * nd.log(yhat+1e-6))

  #################################################
  # METHOD: learn model from training data
  #################################################
  # PARAMETERS:
  # X: training vectors, shape = [n_samples,n_features]
  # y: target values, shape = [n_samples]
  # optimizer: function used for optimization
  # RETURNS:
  # self: object
  #################################################
  def fit(self, X, y, optimizer):
    if X.ndim == 1:
      X = X[:, None]
    if self.add_bias:
      N = X.shape[0]
      X = np.column_stack([X,np.ones(N)])           # add vector of 1s
    N,D = X.shape                                   # N samples, D features

    def gradient(X, y, w):
      N,D = X.shape
      yh = self.softmax(np.dot(X,w.T))      # softmax on scores to get probabilities
      #print('yh soft: ', yh.shape)
      y_mat = self.one_hot_encoding(y)    # convert classs to one-hot representation
      #print('shape y_mat: ', y_mat.shape)
      #loss = (-1/N) * np.sum(y_mat * np.log(yh))
      try:
        grad = (-1/N) * np.dot(X.T,(yh-y))
        #grad = (-1/N) * np.dot(X.T,(yh-y_mat))
        #grad = 0.5*np.dot(yh-y_true, X)/N
        return grad

      except ValueError as e:
        print(e)
        print('shape N: ', N)
        print('shape D: ', D)
        print('shape yh: ', yh.shape)
        print('shape y_mat: ', y_mat.shape)

    w0 = np.zeros([len(np.unique(y)),D])            # initialize the weights to [classes, features]
    self.w = optimizer.run(gradient, X, y, w0)      # run the optimizer to get the optimal weights

    return self

  #####################################################
  # METHOD: Predict targets from X
  #####################################################
  # PARAMETERS:
  # X: training vectors, shape = [n_samples,n_features]
  # RETURNS:
  # yh: predicted target values, shape = [n_samples]
  #####################################################
  def predict(self, X):
    if X.ndim == 1:
      X = X[:, None]
    N = X.shape[0]
    if self.add_bias:
      X = np.column_stack([X,np.ones(N)])         # add bias vetor
    yh = self.softmax(np.dot(X,self.w.T))           # predict output yh = softmax(xW + b)
    return yh

In [None]:
# loaded training data digits data set
m_digits = digits.data.shape[0]  # number of samples
X_digits = digits.data           # shape=[1797,64]=[n_samples,n_features]
y_digits = digits.target_names   # names of target classes  (0,1...9)

# define an optimizer
optimizer = MiniBatchGradientDescent(learning_rate=.005, max_iters=100, record_history=True, batch_size=m_digits)

# define a model
model = MultiClassLogisticRegression()

# pass an instance of the optimizer to the model, fit() method will run this to fit the data
model.fit(X_digits,y_digits,optimizer)

model.predict(X_digits)
#print('w history:', optimizer.w_history)


**Train Model**

**Part 3: Testing of Soft Max**

In [None]:
#

**Part 4: Alternative Algorithm**

In [None]:
#