Reusing previous Assignment's IELM code to perform classification on MNIST data set. 

Also, a genetic algorithm is used as the optimizer.

In [36]:
# importing necessary libraries to run the model
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import random
import time
from sklearn.model_selection import train_test_split
from functools import partial

In [37]:
# defining necessary functions 

# defining function to get the optimized value
def _best(populace, funcFitness, best, bestFitness):

    # best = None, bestFitness = None
    for i in range(populace[0].shape[0]):
        
        if best is None or valueTmp < bestFitness:
            bestFitness = valueTmp
            best = populace[0][i][np.newaxis, :]
            
        if populace[1][i] > -1.0:
            valueTmp = populace[1][i]
        else:
            valueTmp = funcFitness(populace[0][i][np.newaxis, :])
            populace[1][i] = valueTmp

    return best, bestFitness

# defining a function to provide individual score
def _candidate(size):
    return np.random.randint(2, size=size)

# defining a function to provide score for collection of individuals
def _select_family(populace, funcFitness):

    pop_id = np.random.permutation(np.arange(populace[0].shape[0]))

    parent1 = populace[0][pop_id[0], :][np.newaxis, :]
    parent2 = populace[0][pop_id[1], :][np.newaxis, :]

    # condition for parent fitness in the populace
    if populace[1][pop_id[0]] < -1.0:
        parent1Fitness = funcFitness(parent1)
        populace[1][pop_id[0]] = parent1Fitness
    else:
        parent1Fitness = populace[1][pop_id[0]]

    if populace[1][pop_id[1]] < -1.0:
        parent2Fitness = funcFitness(parent2)
        populace[1][pop_id[1]] = parent2Fitness
    else:
        parent2Fitness = populace[1][pop_id[1]]

    return parent1 if parent1Fitness < parent2Fitness else parent2

# defining the genetic algorithm that will provide the optimized score
def genetic_algorithm(funcFitness, dim, n_candidate=10, epochs=50, hybrid_rate=0.9, mutation_rate=0.1):
    
    assert n_candidate % 2 == 0
    
    populace = [np.array([_candidate(dim) for _ in range(n_candidate)]),
                  np.zeros(n_candidate) - 1.0]

    children = np.zeros((n_candidate, dim))

    best, bestFitness = None, None

    return best, bestFitness

# defining a Function that evolves a candidate
def _evolve(candidate):

    candidate_id = np.random.randint(high=candidate.shape[0], low = 0)

    # flipping a bit
    candidate[candidate_id] = 1 - candidate[candidate_id] 
    return candidate

def _hybrid(male, female):
    candidate_id = np.random.randint(0, male.shape[1], 2)
    candidate_id.sort()

    child2 = np.hstack((female[0, :candidate_id[0]], male[0, candidate_id[0]:candidate_id[1]], female[0, candidate_id[1]:]))[np.newaxis, :]
    child1 = np.hstack((male[0, :candidate_id[0]], female[0, candidate_id[0]:candidate_id[1]], male[0, candidate_id[1]:]))[np.newaxis, :]

    return child1, child2   

# defining a sigmoid function as the activation function
def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

# defining a fourier function as the activation function
def _fourier(x):
    return np.sin(x)

# initialising a simple identity function for the value inputed
def _identity(x):
    return x

# initialising a function to compute the activation of the model, sigmoid is used for now. Fourier and Hard limit can be used as well
def activation_fn_compute(name):
    return {
        'sigmoid': _sigmoid,
        'fourier': _fourier,
    }[name]

# a function to compute mean square error
def _mean_squared_error(y, pred):
    return 0.5 * np.mean((y - pred) ** 2)

# intialising a function to compute the loss of the model, mse is used for now
def loss_compute(name):
    return {
        'mse': _mean_squared_error,
    }[name]

# compute number of neurons
def compute_number_nodes(input_nodes, output_nodes):
    return int(np.ceil(np.sqrt(input_nodes * output_nodes)))

def _wrapper_FitnessFunc(funcFitness, _input_nodes, _output_nodes, X, Y, mask):

  X_Train = mask_input(X, mask)
  omega = 0.1
  train_score = funcFitness(input_nodes, output_nodes, X_train, Y_train)
  score = rho * (1.0 - train_score) + omega * mask.sum().astype(np.float) / mask.shape[1]
  return score

# Incremental Extreme machine learning class is initialised so that model can be run
class IELM:
  
  # defining a function to initialise all the necessary variables
  def __init__(self, input_nodes, hidden_nodes, output_nodes, activation='sigmoid',
                loss='mse', beta_init=None, w_init=None, bias_init=None):
    # declaring the values for the input, hidden and output nodes for the model
    # w is the weight vector
      self._input_nodes = input_nodes
      self._hidden_nodes = 1
      self._output_nodes = output_nodes

    # declaring activation and loss for the model
      self._activation = activation_fn_compute(activation)
      self._loss = loss_compute(loss)

    # computing alpha value from input and hidden nodes
      self._w = np.random.uniform(-1, 1, size=(self._input_nodes, self._hidden_nodes))
    
    # computing bias value from hidden nodes
      self._bias = np.zeros(shape=(self._hidden_nodes,))

    # computing beta value from hidden and output nodes
      self._beta = np.random.uniform(-1., 1., size=(self._hidden_nodes, self._output_nodes))

  def sigmoid(self, x):
      return 1. / (1. + np.exp(-x))

  # function to place them in lists
  def predict(self, X):
      return list(self(X))

  def genetic_algorthm(self, X, Y, mask, best):
    pred = self.predict(X)
    # compute Loss
    loss = self._loss(Y, pred)

    # compute Accuracy
    acc = np.sum(np.argmax(pred, axis=-1) == np.argmax(Y, axis=-1)) / len(Y)

    return loss, acc

  # a function to run the exact model which takes parameters 
  def fit(self, X, Y, LMax, display_time=False):
      
      # computing the value with sigmoid activation function 
      # H is the hidden layer output matrix
      H = self.sigmoid(X.dot(self._w))

      # pseudoinverse of the hidden layer matrix
      H_pinv = np.linalg.pinv(H)

      # beta gets updated
      self._beta = H_pinv.dot(Y)

      # looping it within the range of LMax which will be initialised later
      for i in range(2,LMax):
        # initialising random for beta and w(alpha)
          beta_random = np.random.uniform(-1.,1.,size=(1, self._output_nodes))
          alpha_random = np.random.uniform(-1.,1.,size=(self._input_nodes, 1))
          self._w=np.hstack([self._w,alpha_random])

          # update the shape everytime the loop runs
          
          self._beta = np.vstack([self._beta,beta_random])
          H = self.sigmoid(X.dot(self._w))
          
          # pseudoinverse 
          H_pinv = np.linalg.pinv(H)
          
          # below step updates beta
          self._beta = H_pinv.dot(Y)

      # to provide the time taken for training 
      if display_time:
          start = time.time()
      H_pinv = np.linalg.pinv(H)
      if display_time:
          stop = time.time()
          print(f'Train time: {stop-start}')

      self._beta = H_pinv.dot(Y)

  def __call__(self, X):
      H = self._activation(X.dot(self._w) + self._bias)
      return H.dot(self._beta)

  # initialising a function to evaluate the loss and accuracy of the model
  def compute_loss_acc(self, X, Y):
      pred = self.predict(X)

      # compute Loss
      loss = self._loss(Y, pred)

      # compute Accuracy
      acc = np.sum(np.argmax(pred, axis=-1) == np.argmax(Y, axis=-1)) / len(Y)

      return loss, acc

In [38]:
# Initialising model variables and declaring them
classes = 10
hidden_layers = 512

# input lenght is 28**2 as it is 28x28 
input_length = 28**2

LMax=100

In [39]:
# Loading MNIST Dataset using the library functionality of Keras
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Processing each image which is a 28*28 pixel into input vectors between 0 and 255 for training
x_train = x_train.astype(np.float32) / 255.
x_train = x_train.reshape(-1, input_length)

# Processing each image which is a 28*28 pixel into input vectors between 0 and 255 for testing
x_test = x_test.astype(np.float32) / 255.
x_test = x_test.reshape(-1, input_length)

# converting the value to categorical
y_train = to_categorical(y_train, classes).astype(np.float32)
y_test = to_categorical(y_test, classes).astype(np.float32)

In [40]:
# create instance of our model
model = IELM(
    input_length,
    hidden_layers,
    classes
)

In [41]:
# Train model and compute accuracy and loss
model.fit(x_train, y_train, LMax, display_time=True)

# computing time taken
timeTaken_train = time.time()

# loss and accuracy from the GA optimizer
optimizedGA_loss, accuracy_train = model.genetic_algorthm(x_train, y_train, mask=None, best=None)
final_timeTaken_train = time.time()
print('Training Loss:', optimizedGA_loss)
print('GAs best accuracy:', accuracy_train)
print('Total Time require for Training is (in Seconds):', (final_timeTaken_train-timeTaken_train))

Train time: 0.9560821056365967
Training Loss: 0.023392366898266015
GAs best accuracy: 0.8033166666666667
Total Time require for Training is (in Seconds): 0.8445286750793457


In [42]:
#  Test model and compute accuracy and loss
timeTaken_test = time.time()

# loss and accuracy from the GA optimizer
optimizedGA_loss_test, accuracy_test = model.genetic_algorthm(x_test, y_test, mask=None, best=None)
final_timeTaken_test = time.time()
print('GAs optimized Testing loss:', optimizedGA_loss_test)
print('Testing accuracy:', accuracy_test)
print('Total Time required for Testing is (in seconds):', (final_timeTaken_test-timeTaken_test))

GAs optimized Testing loss: 0.023046650914068156
Testing accuracy: 0.8103
Total Time required for Testing is (in seconds): 0.1635141372680664
