In [1]:
import numpy as np
import math

class Tensor:
    def __init__(self, val):
        self.val = val
        self.grad = None
        self.backward = None
    
    def __repr__(self):
        return f'{self.val}'

    # how to save backward op for both    
    # c = a * b 
    # dc/da = b
    # dc/db = a 
    def __mul__(self, tensor):
        new = Tensor(self.val * tensor.val)
        def backward():
            self.grad = tensor.val 
            tensor.grad = self.val
        new.backward = backward
        return new
    
    def __add__(self, tensor):
        new = Tensor(self.val + tensor.val)
        def backward():
            print(self, tensor)
            self.grad = 0
            tensor.grad = 0
        new.backward = backward
        return new

class Sigmoid:
    def __init__(self):
        self.a = None
    
    def forward(self, x):
        self.a = 1 / (1+np.exp(-x))
        return self.a
    
    def backward(self):
        return self.a*(1-self.a) 

def softmax(x):
    return np.exp(x) / sum(np.exp(x))

class Layer:
    def __init__(self, input_size, output_size, activation):
        self.w = np.random.rand((output_size, input_size+1))
        self.activation = activation
        self.dw = None
        self.x = None

    # input must be of size (input_size, _)
    def forward(self, x):
        x_biases = np.ones((1, x.shape[1]))
        x = np.vstack([x, x_biases])
        self.x = x
        h = self.w @ x
        z = self.activation(h)
        return z

    # backward of a layer starts with z1 
    # z = activation(h)
    # h = self.weights @ x
    # return derivative w respect to input, x
    def backward(self, dLdz):
        dzdh = self.activation.backward()
        dhdx = self.w.T 
        dLdx = dLdz * dzdh * dhdx
        self.dw = dLdz * dzdh * self.x
        return dLdx
    
    def step(self):
        self.w -= self.dw
        

# x (b, d)
# h1 (d, h1_size)
# h2 (h1_size, h2_size)
# h3 (h2_size, out_size)
# out = x @ h1 @ h2 @ h3  
 
class MLP:
    def __init__(self, input_size, h1_size, h2_size, output_size):
        self.input_layer = Layer(input_size, h1_size)
        self.h1 = Layer(h1_size, h2_size)
        self.h2 = Layer(h2_size, output_size)
        # i want logits for outputs
    
    def forward(self, x):
        h1 = self.input_layer(x)
        h2 = self.h1(h1)
        z = self.h2(h2)
        s = softmax(z)
        return s

    # dLdz = softmax loss = s - y
    def backward(self, dLdz):
        dzdh2 = self.h2.backward(dLdz)
        dh2dh1 = self.h1.backward(dzdh2)
        dh1dx = self.input_layer.backward(dh2dh1)

    # call after 
    def step(self):
        self.h2.step()
        self.h1.step()
        self.input_layer.step()




    


        


In [3]:

        
import numpy as np

X = np.random.rand(10, 1)
Y = np.random.rand(5, 5)

# Add a new axis to X and Y
X = np.reshape(X, (10,1,1,1))  # X will have shape (10, 1, 1, 1)
Y = np.reshape(Y, (1,1,1,5))  # Y will have shape (1, 1, 5, 5)

# Concatenate Y to X along axis 1
result = np.concatenate((X, Y), axis=1)  # result will have shape (10, 2, 5, 5)

# Add another array Z of shape (5, 5) to result
Z = np.random.rand(5, 5)
Z = Z[np.newaxis, np.newaxis, :, :]  # Z will have shape (1, 1, 5, 5)

result = np.concatenate((result, Z), axis=1)  # result will have shape (10, 3, 5, 5)


ValueError: cannot reshape array of size 25 into shape (10,1,1,5)