In [None]:
import numpy as np

In [None]:
class Linear_layer:
  """ This creates a linear layer which basically contains a matrix of weights and a vector of biases. """
  def __init__(self,n_inputs,n_neurons):
    """ This initializes the layer with a weight matrix of shape (samples,no. of neurons in the layer). Bias vector is of shape(no. of neurons,1). """
    self.weights = np.random.randn(n_inputs,n_neurons)
    self.bias = np.zeros(n_neurons,)

  def forward(self,inputs):
    """ Performs forward propagation. Inputs are multiplied with the weights and biases are added to the product. This output will be sent to an activation function. """
    self.inputs = inputs
    self.output = np.dot(inputs,self.weights) + self.bias
    return self.output

  def backward(self,dvalues):
    """Performs backpropagation. It receives the gradients of the next layer and uses it to calculate the gradients with respect to its inputs, weights and bias. """
    self.dinputs = np.dot(dvalues,self.weights.T)
    self.dweights = np.dot(self.inputs.T,dvalues)
    self.dbias = np.sum(dvalues,axis=0,keepdims=True)
    return self.dinputs,self.dweights,self.dbias


In [None]:
class ReLU_activation:
  """ This creates a ReLU activation layer. """
  def __init__(self):
    pass

  def forward(self,inputs):
    """ Returns the maximum of 0 and input. 0 is returned if input is negative. """
    self.inputs = inputs
    self.output = np.maximum(0,self.inputs)
    return self.output

  def backward(self,dvalues):
    """ dvalues represents the gradients of the next layer. Gradients of this layer is dvalues or 0, depending on whether the ReLU output is positive or 0, respectively. """
    self.dinputs = dvalues.copy()
    self.dinputs *= self.inputs > 0
    return self.dinputs


In [None]:
class sigmoid_activation:
  """ This creates a sigmoid activation layer. """
  def __init__(self):
    pass

  def forward(self,inputs):
    """ Returns a value between 0 and 1. """
    self.inputs = inputs
    self.output = 1/(1+np.exp(-self.inputs))
    return self.output

  def backward(self,dvalues):
    """ dvalues represents the gradients of the next layer. Gradients of this layer is calculated by multiplying dvalues with derivative of sigmoid function. """
    self.dinputs = dvalues.copy()
    self.dinputs *= self.output*(1-self.output)
    return self.dinputs

In [None]:
class tanh_activation:
  """ This creates the tanh activation function. """
  def __init__(self):
    pass

  def forward(self,inputs):
    """ Returns a value between -1 and 1. """
    self.inputs = inputs
    self.output = np.tanh(self.inputs)
    return self.output

  def backward(self,dvalues):
    """ dvalues represents the gradients of the next layer. Gradients of this layer is calculated by multiplying dvalues with derivative of tanh function. """
    self.dinputs = dvalues.copy()
    self.dinputs *= (1-self.output**2)
    return self.dinputs

In [None]:
class softmax_activation:
  """ This creates a softmax activation function. It is primarily used to calculate the probabilities of the image belonging to different classes in the dataset. """
  def __init__(self):
    pass

  def forward(self,inputs):
    """ Returns probabilities of an image belonging to different classes. """
    self.inputs = inputs
    """ Clipping is done below to prevent numerical overflow. """
    exp_values = np.exp(self.inputs - np.max(self.inputs, axis=1, keepdims=True))
    self.output = exp_values/ np.sum(exp_values,axis=1,keepdims=True)
    return self.output

  def backward(self,dvalues):
    """ dvalues represents the gradients of the next layer. Gradients of this layer is calculated by multiplying dvalues with the jacobian matrix. """
    self.dinputs = dvalues.copy()
    for index, (single_output,single_dvalues) in enumerate(zip(self.output,dvalues)):
      single_output = single_output.reshape(-1,1)
      jacobian_matrix = np.diagflat(single_output) - np.dot(single_output,single_output.T)
      self.dinputs[index] = np.dot(jacobian_matrix,single_dvalues)
    return self.dinputs




In [None]:
class Cross_entropy_loss:
  """ This creates a cross entropy loss function. """
  def __init__(self):
    pass

  def forward(self,probs,labels):
    """ Returns the cross entropy loss. """
    self.probs = probs
    self.labels = labels

    m = self.labels.shape[0]
    self.clip = np.clip(self.probs,1e-7,1-1e-7)
    self.log_loss = -np.sum(self.labels*np.log(self.clip))/m
    return self.log_loss

  def backward(self):
    """ Here the derivative of the loss is calculated with respect to the softmax output. """
    m = self.labels.shape[0]
    self.dinputs = -(1/m)*(self.labels/self.clip)
    return self.dinputs

In [None]:
class MSE_loss:
  """ This creates a MSE loss function. """
  def __init__(self):
    pass

  def forward(self,probs,labels):
    """ Returns the MSE loss. """
    self.probs = probs
    self.labels = labels
    m = self.labels.shape[0]
    self.loss = np.sum((self.probs-self.labels)**2)/m
    return self.loss


  def backward(self):
    """ Here the derivative of the loss is calculated with respet to the softmax output. """
    m = self.labels.shape[0]
    self.dinputs = 2*(self.probs-self.labels)/m
    return self.dinputs

In [None]:
class SGD_optimizer:
  """ This creates a stochastic gradient descent optimizer. """
  def __init__(self,learning_rate=1e-3):
    self.learning_rate = learning_rate

  def step(self,layers,dvalues):
    """ This updates the weights and biases of the layers. """
    for layer in reversed(layers):
      if isinstance(layer,Linear_layer):
        dvalues,dweights,dbias = layer.backward(dvalues)
        layer.weights -= self.learning_rate*dweights
        layer.bias -= self.learning_rate*dbias.reshape(layer.bias.shape)
      else:
        dvalues = layer.backward(dvalues)

In [None]:
import pickle

class Model():
    """ This creates a model class. """
    def __init__(self, layers=[]):
        self.layers = layers

    def add_layer(self, layer_type, *args):
        """ Adds a layer to the model. """
        if layer_type == 'linear':
            if len(args) != 2:
                raise ValueError('Linear_layer requires exactly two parameters')
            self.layers.append(Linear_layer(args[0], args[1]))
        elif layer_type == 'relu':
            self.layers.append(ReLU_activation())
        elif layer_type == 'sigmoid':
            self.layers.append(Sigmoid_activation())
        elif layer_type == 'tanh':
            self.layers.append(Tanh_activation())
        elif layer_type == 'softmax':
            self.layers.append(softmax_activation())
        else:
            raise ValueError('Invalid layer type')

    def compile(self):
        """ Compiles the model. We will be using Cross-entropy loss instead of MSE loss, the reason being that MSE doesn't penalize misclassification as well as Cross-entropy. """
        self.optimizer = SGD_optimizer()
        self.loss = Cross_entropy_loss()

    def train(self, x_train, y_train, epochs, batch_size, x_test, y_test):
        """ Trains the model. Training is done in batches. """
        self.x_test = x_test
        self.y_test = y_test
        for epoch in range(epochs):
            for batch in range(0, len(x_train), batch_size):
                x_batch = x_train[batch:batch+batch_size]
                y_batch = y_train[batch:batch+batch_size]
                # Forward pass
                activations = x_batch
                for layer in self.layers:
                    activations = layer.forward(activations)

                # Compute loss
                loss = self.loss.forward(activations, y_batch)

                # First grad
                dvalues = self.loss.backward()

                # Backward pass
                self.optimizer.step(self.layers, dvalues)

            # Print loss for monitoring
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss}')

    def predict(self, x_test):
        """ Predicts the output of the model. """
        activations = x_test
        for layer in self.layers:
            activations = layer.forward(activations)
        return activations

    def evaluate(self, x_test, y_test):
        """ Evaluates the model. """
        predictions = self.predict(x_test)
        return self.loss.forward(predictions, y_test)

    def save(self, filename):
        """ Saves the model. """
        model_data = {
            'layers': self.layers,
            'optimizer': self.optimizer,
            'loss': self.loss
        }
        with open(filename, 'wb') as file:
            pickle.dump(model_data, file)
        print(f"Model saved to {filename}")

    def load(self, filename):
        """ Loads the model. """
        with open(filename, 'rb') as file:
            model_data = pickle.load(file)
            self.layers = model_data['layers']
            self.optimizer = model_data['optimizer']
            self.loss = model_data['loss']
        print(f"Model loaded from {filename}")

