In [None]:
import numpy as np

In [None]:
class Tensor:
  def __init__(self, value, _parent = None, _op = None, grad = 0, _label = None):
    self.value = value
    self._parent = _parent
    self._op = _op
    self.grad = 0
    self._label = _label

  def __repr__(self):
    return f'Label: {self._label}; Value: {self.value}; Grad: {self.grad}'

  def backward(self):
     pass

  def __add__(self, other):
    v = self.value + other.value
    out = Tensor(v, (self, other), '+')
    def backward():

      self.grad += out.grad * np.ones_like(self.value)
      other.grad += out.grad * np.ones_like(other.value)

      self.backward()
      other.backward()
    out.backward = backward
    return out

    '''
  def dot(self, other):
    #1-d dot product, nx1 @ 1xn, seperating bc of the backward fn, self and other for dot product is diff than matmul
    v = np.dot(self.value, other.value)
    out = Tensor(v, (self, other), '.')
    def backward():
      self.grad = out.grad * other.value.T
      other.grad = out.grad * self.value.T

      self.backward()
      other.backward()
    out.backward = backward
    return out
    '''

  def matmul(self, other):
    #self = nxd, other = dx1, i.e. self is the weight matrix and other is the input
    v = np.matmul(self.value, other.value)
    out = Tensor(v, (self, other), '@')
    def backward():
      #need to see fix the diemnsioning, see if any of the dimensions = 1
      #TODO: - After fixing dimensioning, we need to doubling checking the gradient calculations, then it should be fine honestly. After that we need to making the neural net class, we need call backward on child, then all parents.
      if (1 not in self.value.shape) and (1 not in other.value.shape):
          print(self.value.shape)
          # Both operands are matrices
          self.grad += out.grad @ other.value.T
          other.grad += self.value.T @ out.grad
      elif (1 not in self.value.shape) and (1 in other.value.shape):
          # self is a matrix, other is a column vector
          self.grad += out.grad @ other.value.T
          other.grad += (self.value.T) @ (out.grad)

      elif (1 in self.value.shape) and (1 not in other.value.shape):
          # self is a row vector, other is a matrix
          self.grad += out.grad @ other.value.T
          other.grad += self.value @ out.grad
      else:
          # Both operands are vectors, handle as dot product
          #self is 1xn, other is nx1
          assert self.value.shape[1] == other.value.shape[0]
          self.grad += other.value.T * out.grad
          other.grad += self.value.T * out.grad
      self.backward()
      other.backward()
    out.backward = backward
    return out

  def relu(self):
    v = np.maximum(0, self.value)
    out = Tensor(v, (self), 'ReLU')

    def backward():
      self.grad = np.where(self.value > 0, 1, 0)
      self.backward()
    out.backward = backward
    return out

  def mse(self, y):
     v = np.mean(np.power(self.value - y, 2))
     out = Tensor(v, (self), 'MSE')

     def backward():
        self.grad = 2 * (self.value - y) / np.size(y)
        self.backward()
     out.backward = backward
     return out













In [None]:
class Dense_Layer:
  def __init__(self, input_size, output_size):
    self.weights = Tensor(np.random.normal(size=(output_size, input_size)).astype(np.float64))
    self.bias = Tensor(np.zeros((output_size, 1)).astype(np.float64))

  def forward(self, input):
    self.input = input
    z_pre_bias = self.weights.matmul(input)
    z = z_pre_bias + self.bias

    #setting grads to 0, since grads from previosu forward pass need sot be cleared
    input.grad = np.zeros_like(input.value)
    self.weights.grad = np.zeros_like(self.weights.value)
    self.bias.grad = np.zeros_like(self.bias.value)
    z_pre_bias.grad = np.zeros_like(z_pre_bias.value)
    z.grad = np.zeros_like(z.value)
    return z

  def backward(self, learning_rate):
    #zero the gradients
    #add minus operation
    self.weights.value -= learning_rate * self.weights.grad
    self.bias.value -= learning_rate * self.bias.grad






In [None]:
class ReLU_Layer:
    def __init__(self):
        pass
    def forward(self, input):
        self.input = input
        output = input.relu()

        #need to zero grads from previous forward pass
        input.grad = np.zeros_like(input.value)
        output.grad = np.zeros_like(output.value)
        return output
    def backward(self, learning_rate):
        pass


In [None]:
class MSE_Layer:
    def __init__(self):
        pass
    def forward(self, input, y):
        self.input = input
        output = input.mse(y)

        #need to zero grads from previous forward pass
        input.grad = np.zeros_like(input.value)
        output.grad = np.zeros_like(output.value)
        return output
    def backward(self, learning_rate):
        pass

In [None]:
class MLP:
    def __init__(self, layers):
        self.layers = layers
    def forward(self, input, y):
        input_l = input
        for layer in self.layers[:-1]:
            #print("inputs: ")
            #print(input_l)
            input_l = layer.forward(input_l)
        self.output = input_l
        self.loss = self.layers[-1].forward(input_l, y)
        return input_l
    def backward(self, learning_rate):
        self.loss.grad = np.ones_like(self.output.value)
        self.loss.backward()

        for layer in reversed(self.layers):
            layer.backward(learning_rate)
        #print('GRADS')
        #print(self.loss._parent)
        #print(self.loss._parent._parent)

    def train(self, epochs, learning_rate, data_loader):
        for _ in range(0, epochs):
            error = 0
            for input_data, desired_output in data_loader:
                prediction = self.forward(input_data, desired_output)
                error = self.loss
                self.backward(learning_rate)
            #error /= data_loader.batch_size
            print(f"Error for epoch {_}: {error} ")
            print(f'input: {input_data}')
            print(f'prediction: {self.output}')


In [None]:
layer_list = [
    Dense_Layer(2, 3),
    ReLU_Layer(),
    Dense_Layer(3, 1),
    ReLU_Layer(),
    MSE_Layer()
]

network = MLP(layer_list)




In [None]:
#testing XOR
network.train(100, 0.1, dataloader)

In [None]:

class DataLoader:
    def __init__(self, inputs, desired_outputs, batch_size, shuffle=True):
        self.inputs = inputs
        self.desired_outputs = desired_outputs
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        # Get the total number of data points
        self.n_samples = self.inputs.shape[0]

        # Create an array of indices
        self.indices = np.arange(self.n_samples)

        # Shuffle if required
        if self.shuffle:
            np.random.shuffle(self.indices)

        return self

    def __next__(self):
        # If all data has been seen, stop the iteration
        if len(self.indices) == 0:
            raise StopIteration

        # Select indices for the current batch
        current_indices = self.indices[:self.batch_size]
        self.indices = self.indices[self.batch_size:]

        # Extract the batch of data
        batch_inputs = Tensor(self.inputs[current_indices].T)
        batch_outputs = self.desired_outputs[current_indices]

        return batch_inputs, batch_outputs



In [None]:
# Example usage
inputs = np.reshape(([0, 0], [0, 1], [1, 0], [1 , 1]), (4, 2)).astype(np.float64) # 100 samples, 10 features each
#inputs = inputs.T
print(inputs)
#inputs = inputs.T
desired_outputs = np.reshape((0, 1, 1, 0), (4, 1)).astype(np.float64)  # 100 samples, 1 output each
batch_size = 1

dataloader = DataLoader(inputs, desired_outputs, batch_size)


for batch_inputs, batch_outputs in dataloader:
    print("Batch inputs:", batch_inputs)
    print("Batch outputs:", batch_outputs)


[[0. 0.]
 [0. 1.]
 [1. 0.]
 [1. 1.]]
Batch inputs: Label: None; Value: [[0.]
 [0.]]; Grad: 0
Batch outputs: [[0.]]
Batch inputs: Label: None; Value: [[1.]
 [0.]]; Grad: 0
Batch outputs: [[1.]]
Batch inputs: Label: None; Value: [[0.]
 [1.]]; Grad: 0
Batch outputs: [[1.]]
Batch inputs: Label: None; Value: [[1.]
 [1.]]; Grad: 0
Batch outputs: [[0.]]
