In [1]:
# importing necessary libraries to run the model
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import random
import time

In [2]:
# defining necessary functions 

# defining a sigmoid function as the activation function
def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

# defining a fourier function as the activation function
def _fourier(x):
    return np.sin(x)

# initialising a simple identity function for the value inputed
def _identity(x):
    return x

# initialising a function to compute the activation of the model, sigmoid is used for now. Fourier and Hard limit can be used as well
def activation_fn_compute(name):
    return {
        'sigmoid': _sigmoid,
        'fourier': _fourier,
    }[name]

# a function to compute mean square error
def _mean_squared_error(y, pred):
    return 0.5 * np.mean((y - pred) ** 2)

# intialising a function to compute the loss of the model, mse is used for now
def loss_compute(name):
    return {
        'mse': _mean_squared_error,
    }[name]

# Incremental Extreme machine learning class is initialised so that model can be run
class IELM:
  
  # defining a function to initialise all the necessary variables
  def __init__(self, input_nodes, hidden_nodes, output_nodes, activation='sigmoid',
                loss='mse', beta_init=None, w_init=None, bias_init=None):
    # declaring the values for the input, hidden and output nodes for the model
    # w is the weight vector
      self._input_nodes = input_nodes
      self._hidden_nodes = 1
      self._output_nodes = output_nodes

    # declaring activation and loss for the model
      self._activation = activation_fn_compute(activation)
      self._loss = loss_compute(loss)

    # computing alpha value from input and hidden nodes
      self._w = np.random.uniform(-1, 1, size=(self._input_nodes, self._hidden_nodes))
    
    # computing bias value from hidden nodes
      self._bias = np.zeros(shape=(self._hidden_nodes,))

    # computing beta value from hidden and output nodes
      self._beta = np.random.uniform(-1., 1., size=(self._hidden_nodes, self._output_nodes))

  def sigmoid(self, x):
      return 1. / (1. + np.exp(-x))

  # function to place them in lists
  def predict(self, X):
      return list(self(X))

  # a function to run the exact model which takes parameters 
  def fit(self, X, Y, LMax, display_time=False):
      
      # computing the value with sigmoid activation function 
      # H is the hidden layer output matrix
      H = self.sigmoid(X.dot(self._w))

      # pseudoinverse of the hidden layer matrix
      H_pinv = np.linalg.pinv(H)

      # beta gets updated
      self._beta = H_pinv.dot(Y)

      # looping it within the range of LMax which will be initialised later
      for i in range(2,LMax):
        # initialising random for beta and w(alpha)
          beta_random = np.random.uniform(-1.,1.,size=(1, self._output_nodes))
          alpha_random = np.random.uniform(-1.,1.,size=(self._input_nodes, 1))
          self._w=np.hstack([self._w,alpha_random])

          # print the shape everytime the loop runs
          print(self._beta.shape,beta_random.shape)
          self._beta = np.vstack([self._beta,beta_random])
          H = self.sigmoid(X.dot(self._w))
          
          # pseudoinverse 
          H_pinv = np.linalg.pinv(H)
          
          # below step updates beta
          self._beta = H_pinv.dot(Y)

      # to provide the time taken for training 
      if display_time:
          start = time.time()
      H_pinv = np.linalg.pinv(H)
      if display_time:
          stop = time.time()
          print(f'Train time: {stop-start}')

      self._beta = H_pinv.dot(Y)

  def __call__(self, X):
      H = self._activation(X.dot(self._w) + self._bias)
      return H.dot(self._beta)

  # initialising a function to evaluate the loss and accuracy of the model
  def compute_loss_acc(self, X, Y):
      pred = self.predict(X)

      # compute Loss
      loss = self._loss(Y, pred)

      # compute Accuracy
      acc = np.sum(np.argmax(pred, axis=-1) == np.argmax(Y, axis=-1)) / len(Y)

      return loss, acc

In [3]:
# Initialising model variables and declaring them
classes = 10
hidden_layers = 512

# input lenght is 28**2 as it is 28x28 
input_length = 28**2

LMax=100

In [4]:
# Loading MNIST Dataset using the library functionality of Keras
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Processing each image which is a 28*28 pixel into input vectors between 0 and 255 for training
x_train = x_train.astype(np.float32) / 255.
x_train = x_train.reshape(-1, input_length)

# Processing each image which is a 28*28 pixel into input vectors between 0 and 255 for testing
x_test = x_test.astype(np.float32) / 255.
x_test = x_test.reshape(-1, input_length)

# converting the value to categorical
y_train = to_categorical(y_train, classes).astype(np.float32)
y_test = to_categorical(y_test, classes).astype(np.float32)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# create instance of our model
model = IELM(
    input_length,
    hidden_layers,
    classes
)

In [6]:
# Train model and compute accuracy and loss
model.fit(x_train, y_train, LMax, display_time=True)

# computing time taken
timeTaken_train = time.time()
loss_train, accuracy_train = model.compute_loss_acc(x_train, y_train)
final_timeTaken_train = time.time()
print('training loss:', loss_train)
print('training accuracy:', accuracy_train)
print('Total Time require for Training is (in Seconds):', (final_timeTaken_train-timeTaken_train))

(1, 10) (1, 10)
(2, 10) (1, 10)
(3, 10) (1, 10)
(4, 10) (1, 10)
(5, 10) (1, 10)
(6, 10) (1, 10)
(7, 10) (1, 10)
(8, 10) (1, 10)
(9, 10) (1, 10)
(10, 10) (1, 10)
(11, 10) (1, 10)
(12, 10) (1, 10)
(13, 10) (1, 10)
(14, 10) (1, 10)
(15, 10) (1, 10)
(16, 10) (1, 10)
(17, 10) (1, 10)
(18, 10) (1, 10)
(19, 10) (1, 10)
(20, 10) (1, 10)
(21, 10) (1, 10)
(22, 10) (1, 10)
(23, 10) (1, 10)
(24, 10) (1, 10)
(25, 10) (1, 10)
(26, 10) (1, 10)
(27, 10) (1, 10)
(28, 10) (1, 10)
(29, 10) (1, 10)
(30, 10) (1, 10)
(31, 10) (1, 10)
(32, 10) (1, 10)
(33, 10) (1, 10)
(34, 10) (1, 10)
(35, 10) (1, 10)
(36, 10) (1, 10)
(37, 10) (1, 10)
(38, 10) (1, 10)
(39, 10) (1, 10)
(40, 10) (1, 10)
(41, 10) (1, 10)
(42, 10) (1, 10)
(43, 10) (1, 10)
(44, 10) (1, 10)
(45, 10) (1, 10)
(46, 10) (1, 10)
(47, 10) (1, 10)
(48, 10) (1, 10)
(49, 10) (1, 10)
(50, 10) (1, 10)
(51, 10) (1, 10)
(52, 10) (1, 10)
(53, 10) (1, 10)
(54, 10) (1, 10)
(55, 10) (1, 10)
(56, 10) (1, 10)
(57, 10) (1, 10)
(58, 10) (1, 10)
(59, 10) (1, 10)
(60, 1

In [7]:
#  Test model and compute accuracy and loss
timeTaken_test = time.time()
loss_test, accuracy_test = model.compute_loss_acc(x_test, y_test)
final_timeTaken_test = time.time()
print('Testing loss:', loss_test)
print('Testing accuracy:', accuracy_test)
print('Total Time required for Testing is (in seconds):', (final_timeTaken_test-timeTaken_test))

Testing loss: 0.023630283982512396
Testing accuracy: 0.8035
Total Time required for Testing is (in seconds): 0.1660621166229248
