<a href="https://colab.research.google.com/github/sthalles/logistic-regression/blob/master/Softmax_Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import tensorflow as tf
from numpy.linalg import pinv, inv

In [0]:
class DataSet:
  def __init__(self, data, targets, valid_classes=None):
    if valid_classes is None:
      self.valid_classes = np.unique(targets)
    else:
      self.valid_classes = valid_classes
    #print(self.valid_classes)
    self.number_of_classes = len(self.valid_classes)
    self.data = self.to_dict(data, targets)
    
    total = 0
    for i in self.data.keys():
      print("Class {0} # of records: {1}".format(i,len(self.data[i])))
      total += len(self.data[i])
    print("Total:",total)
    
  def to_dict(self, data, targets):
    data_dict = {}
    for x, y in zip(data, targets):
      if y in self.valid_classes:
        if y not in data_dict:
          data_dict[y] = [x.flatten()]
        else:
          data_dict[y].append(x.flatten())

    for i in self.valid_classes:
      data_dict[i] = np.asarray(data_dict[i])

    return data_dict

  def get_data_by_class(self, class_id):
    if class_id in self.valid_classes:
      return self.data[class_id]
    else:
      raise ("Class not found.")

  def get_data_as_dict(self):
    return self.data

  def get_all_data(self):
    data = []
    labels = []
    for label, class_i_data in self.data.items():
      data.extend(class_i_data)
      labels.extend(class_i_data.shape[0] * [label])
    data = np.asarray(data)
    labels = np.asarray(labels)
    return data, labels

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
train_dataset = DataSet(x_train, y_train, valid_classes=[0,1,2,3,4])
x_train, y_train = train_dataset.get_all_data()

test_dataset = DataSet(x_test, y_test, valid_classes=[0,1,2,3,4])
x_test, y_test = test_dataset.get_all_data()

Class 0 # of records: 5923
Class 4 # of records: 5842
Class 1 # of records: 6742
Class 2 # of records: 5958
Class 3 # of records: 6131
Total: 30596
Class 2 # of records: 1032
Class 1 # of records: 1135
Class 0 # of records: 980
Class 4 # of records: 982
Class 3 # of records: 1010
Total: 5139


In [0]:
x_train = np.reshape(x_train, (x_train.shape[0],-1))
x_test = np.reshape(x_test, (x_test.shape[0],-1))

In [0]:
def to_one_hot(targets,k):
  onehot = np.zeros((targets.shape[0], k))
  for i,t in enumerate(targets):
    onehot[i][t] = 1
  return onehot

In [0]:
class LogisticRegression:
  def __init__(self,fit_intercept=True):
    self.fit_intercept = fit_intercept
    self.W = None
  
  def score(self,X,y):
    """
    X: Logits from softmax(W^Tx). shape: [N,K]
    y: true targets. shape [N]
    """
    if self.fit_intercept:
      X = self.add_intercept(X)    
      
    logits = self.forward(X)
    pred = np.argmax(logits,axis=1)
    return np.sum(pred == y) / len(y)
    
  def predict(self,X):
    """
    X: Logits from softmax(W^Tx). shape: [N,K]
    """
    if self.fit_intercept:
      X = self.add_intercept(X)    
      
    logits = self.forward(X)
    pred = np.argmax(logits,axis=1)
    return pred
    
  def _softmax(self, a):
    """
    a: Linear combination of inputs and weights: shape: [N,K] 
    """
    return np.exp(a) / np.sum(np.exp(a), axis=-1, keepdims=True)
    
  def forward(self,X):
    """
    Model the posterior probability P(Ck|x) as a softmax function on the linear combination of inputs X and weights W.
    X: inputs, shape: [N,K]
    return: softmax transformation probabilities.
    """
    logits = np.dot(X,self.W)
    
    safe_logits = logits - np.max(logits, axis=-1, keepdims=True)
    return self._softmax(logits) # the order dot(x,W) seems correct

  def add_intercept(self,x):
    # generate a NxM design matrix, with an added column of 1
    const = np.ones((x.shape[0],1))
    return np.concatenate((const,x),axis=1)

  def fit(self,X,y,iterations=2):
    """
    Fit K-1 lines to the input data X.
    X: inputs, shape: [N,K]
    y: true targets. shape [N]
    iterations: max number of iterations for training
    """
    if self.fit_intercept:
      X = self.add_intercept(X)
      
    # get the number of classes
    k = len(np.unique(y))
    
    # get the data dimensionality
    d = X.shape[1] 
    
    # compute the dimension of the Hessian
    dk = d*k
    
    print("Input shape:",X.shape)
    print("# classes: {0} features dim:{1}".format(k,d))
    
    # convert the labels to one-hot encoding
    y = to_one_hot(y, k)
    
    HT = np.zeros((d,k,d,k))
      
    if self.W is None:
      self.W = np.zeros([d, k])
      W_shape = self.W.shape
    
    for i in range(iterations):
      logits = self.forward(X)
      for i in range(k):
        for j in range(k):
          r = np.multiply(logits[:,i],((i==j)-logits[:,j]))  ## r has negative value, so cannot use sqrt
          HT[:,i,:,j] = np.dot(np.multiply(X.T,r),X) # 4.110      
      
      G = np.dot(X.T,(logits-y))
      H = np.reshape(HT,(dk,dk))
      print("Hessian:",H.shape)
      self.W = self.W.reshape(-1) - np.dot(pinv(H), G.reshape(-1)) # 4.92
      # W = W - 0.001 * G
      # print(np.min(W),np.max(W))
      self.W = np.reshape(self.W,W_shape)

In [10]:
clf = LogisticRegression(fit_intercept=True)
clf.fit(x_train,y_train)

Input shape: (30596, 785)
# classes: 5 features dim:785
Hessian: (3925, 3925)
Hessian: (3925, 3925)


In [12]:
print("Test accuracy:",clf.score(x_test,y_test))

Test accuracy: 0.9673088149445418
