In [1]:
import numpy as np
import torch
from typing import Iterator

In [2]:
class Tensor(np.ndarray):
    def __new__(cls, input_array, info=None):
        # Input array is an already formed ndarray instance
        # We first cast to be our class type
        obj = np.asarray(input_array, dtype=np.float32).view(cls)
        # add the new attribute to the created instance
        obj.grad = np.zeros_like(input_array)
        # Finally, we must return the newly created object:
        return obj

    def __array_finalize__(self, obj):
        # see InfoArray.__array_finalize__ for comments
        if obj is None: return
        self.grad = getattr(obj, 'grad', None)

class NeuralModule:
    def __init__(self):
        pass
    def backward(self, x, y):
        pass
    def update_params(self, lr):
        pass
    def zero_grads(self):
        pass

class NeuralModulesList:
    modules : dict[int, NeuralModule]
    def __init__(self,):
        self.modules = dict()
        self.n = 0
        pass

    def __iter__(self) -> Iterator[NeuralModule]:
        return iter(self.modules.values())

    def __next__(self) -> NeuralModule: 
        return iter(self.modules.values())

    def __getitem__(self, index) -> NeuralModule:
        return self.modules[index]

    def append(self, module:NeuralModule):
        self.modules[self.n] = module
        self.n += 1

    def extend(self, lst_modules:list[NeuralModule]):
        for module in lst_modules:
            self.modules[self.n] = module
            self.n += 1        

    def __len__(self):
        return self.n


In [3]:
class NeuralLayer(NeuralModule):
    def __init__(self, in_dim, out_dim):
        # layers
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.W = Tensor( np.random.randn(in_dim, out_dim) )
        self.B = Tensor( np.zeros(out_dim) )
        self.train = True
        self.zero_grads()
        self.init_weights()
        
    def init_weights(self):
        fan_in = self.W.shape[-1]
        std = np.sqrt(2/fan_in)
        self.W = Tensor( np.random.randn(*self.W.shape) * std )

    def __call__(self, x):
        return x @ self.W + self.B
    
    def backward(self, x:Tensor, h:Tensor):
        if self.train:
            self.W.grad += x.T @ h.grad
            self.B.grad += np.sum(h.grad, axis=0)
            x.grad += h.grad @ self.W.T
        return 
    
    def zero_grads(self):
        # derivatives
        self.W.grad = np.zeros(shape=(self.in_dim, self.out_dim))
        self.B.grad = np.zeros(shape=(self.out_dim))

    def update_params(self, lr):
        self.W = self.W - lr*self.W.grad
        self.B = self.B - lr*self.B.grad
    
    def __matmul__(self, Y:np.array):
        return Y @ self.W

In [4]:
x = np.ones((32,5))
layer = NeuralLayer(5,10)

layer.W + (layer.W**2).sum()

Tensor([[12.76087 , 12.266198, 12.111102, 13.734438, 13.307162,
         13.426407, 12.06189 , 11.747089, 12.82989 , 13.590902],
        [12.473997, 13.470976, 13.014597, 12.078024, 12.612325,
         12.683188, 12.716412, 12.716234, 12.727184, 12.759898],
        [12.702137, 12.517447, 12.75696 , 13.467454, 13.302441,
         11.92293 , 12.729667, 13.514252, 12.757411, 12.621441],
        [11.877745, 12.505779, 12.904266, 13.450518, 12.509647,
         13.214955, 12.807601, 13.287572, 12.546663, 13.669604],
        [12.956601, 12.944936, 12.455944, 13.808249, 13.227751,
         12.823907, 13.032972, 12.653656, 13.48045 , 12.279826]],
       dtype=float32)

In [5]:
class Dropout(NeuralModule):
    def __init__(self, p=0.8):
        self.p = p
        if self.p == 0:
            self.p += 1e-6
        if self.p == 1:
            self.p -= 1e-6
    
    def __call__(self, x:Tensor):
        self.mask = (np.random.rand(*x.shape) < self.p) / self.p 
        return x * self.mask
    
    def backward(self, x:Tensor, h:Tensor, weight_decay:float):
        x.grad = h.grad * self.mask
        return

In [6]:
x = np.ones((1,5))
dp = Dropout()
dp(x)

array([[0.  , 0.  , 1.25, 1.25, 1.25]])

In [7]:
class AvgPool(NeuralModule):
    def __init__(self, k_size, stride=(1,1), padding=(0,0)):
        super().__init__()
        self.k_size_h, self.k_size_w = k_size
        self.stride = stride
        self.pad_h, self.pad_w = padding

    def __call__(self, x:Tensor) -> Tensor:
        # pad_x = np.pad(x, pad_width=[(0,0),(0,0),(self.pad_h, self.pad_h),(self.pad_w, self.pad_w)], mode='constant', constant_values=(0))
        # # 'strided_matrices' is the strided input image, shape = (batch, out_c, out_w, out_h, kx, ky)
        # strided_matrices = self.get_strided_matrices(pad_x, self.k_size_h, self.k_size_w, self.stride)

        n, c, h, w = x.shape
        out_h = (h + 2* self.pad_h - self.k_size_h) // self.stride[0] + 1
        out_w = (w + 2* self.pad_w - self.k_size_w) // self.stride[1] + 1

        windows = self.get_windows(x, (n, c, out_h, out_w), self.k_size_h, self.pad_h, self.stride[0])

        # Mean of every sub matrix, computed without considering the padd(np.nan)
        out_x = Tensor( np.nanmean(windows, axis=(4, 5)) )

        return out_x
    
    def get_windows(self,x:Tensor, output_size, kernel_size, padding=0, stride=1, dilate=0):
        working_input = x
        working_pad = padding
        # dilate the input if necessary
        if dilate != 0:
            working_input = np.insert(working_input, range(1, x.shape[2]), 0, axis=2)
            working_input = np.insert(working_input, range(1, x.shape[3]), 0, axis=3)

        # pad the input if necessary
        if working_pad != 0: 
            working_input = np.pad(working_input, pad_width=((0,), (0,), (working_pad,), (working_pad,)), mode='constant', constant_values=(0.,))

        in_b, in_c, out_h, out_w = output_size
        out_b, out_c, _, _ = x.shape
        batch_str, channel_str, kern_h_str, kern_w_str = working_input.strides

        return np.lib.stride_tricks.as_strided(
            working_input,
            (out_b, out_c, out_h, out_w, kernel_size, kernel_size),
            (batch_str, channel_str, stride * kern_h_str, stride * kern_w_str, kern_h_str, kern_w_str)
        )   
    
    def get_strided_matrices(self, x:Tensor, k_h, k_w, s):
        B, n_c, size_h, size_w = x.shape # B = batch, n_c = num. input channels, size_h = height, size_w = width
        sh, sw = s
        out_h = (size_h - k_h)//sh + 1
        out_w = (size_w - k_w)//sw + 1
        strides = (n_c*size_h*size_w, size_w*size_h, size_w*sh, sw, size_w, 1)
        strides = tuple(i * x.itemsize for i in strides)
        stride_matrices = np.lib.stride_tricks.as_strided(x, 
                                            shape=(B, n_c, out_h, out_w, k_h, k_w),
                                            strides=strides)
        return stride_matrices

    def backward(self, x:Tensor, y:Tensor):
        n, c, out_h, out_w = y.shape
        _, _, in_h, in_w = x.shape
        
        windows = self.get_windows(y.grad, output_size=(n, c, in_h, in_w), kernel_size=self.k_size_h, padding=self.pad_h, dilate=self.stride[0])
        print(f"windows {windows.shape}   x {x.shape}")
        grad_per_pixel = 1 / (self.k_size_h * self.k_size_w)
        x.grad += np.einsum( 'bchwkl->bchw', windows ) * grad_per_pixel



In [8]:
class ConvolutionalLayer(NeuralModule):
    def __init__(self, in_ch, out_ch, k_size, stride=(1,1), padding=(0,0)):
        self.in_ch = in_ch
        self.out_ch = out_ch

        self.k_size_h, self.k_size_w = k_size
        self.stride = stride

        n = out_ch * self.k_size_h * self.k_size_w
        self.pad_h, self.pad_w = padding

        self.train = True

        # for each output channel, there is a 3D matrix of size  in_ch x k_size x k_size 
        self.K = Tensor( np.random.randn(out_ch, in_ch, self.k_size_h, self.k_size_w) * np.sqrt(2/n) )
        self.B = Tensor( np.zeros( (1, out_ch, 1, 1)) )

        # einstein summation indices for convolutions
        # b : batch dimension
        # o : output channels
        # c : input channels
        # i : kernel size on H
        # j : kernel size on W
        # h : size image on H
        # w : size image on W
        self.conv_modes = {'front':'bihwkl,oikl->bohw',  # 'bihwkl,oikl->bohw'    'ochw,bcijhw->boij'
                           'back': 'fdkl,mcijkl->mdij',  
                           'param':'bohw,bchwij->ocij' } 


    def convolution(self, x:Tensor, kernels:Tensor, s=(1,1), mode='front'):

        stride_matrices = self.get_strided_matrices(x, self.k_size_h, self.k_size_w, s)
        #print(f"dilate x {x.shape}  kernel {kernels.shape} stride matrices {stride_matrices.shape}")
        return np.einsum(self.conv_modes[mode], kernels, stride_matrices)


    def __call__(self, x:Tensor):
        n, c, h, w = x.shape
        out_h = (h - self.k_size_h + 2 * self.pad_h) // self.stride[0] + 1
        out_w = (w - self.k_size_w + 2 * self.pad_w) // self.stride[1] + 1

        windows = self.get_windows(x, (n, c, out_h, out_w), self.k_size_h, self.pad_h, self.stride[0])

        out = np.einsum(self.conv_modes['front'], windows, self.K)

        # add bias to kernels
        out += self.B

        self.cache = windows

        return Tensor(out)

    def backward(self, x:Tensor, h:Tensor):
        if self.train:
            windows = self.cache

            padding = self.k_size_h - 1 if self.pad_h == 0 else self.pad_h

            hgrad_windows = self.get_windows(h.grad, x.shape, self.k_size_h, padding=padding, stride=1, dilate=self.stride[0] - 1)
            rot_kern = np.rot90(self.K, 2, axes=(2, 3))

            self.B.grad += np.sum(h.grad, axis=(0, 2, 3)).reshape(self.B.grad.shape)
            self.K.grad += np.einsum('bihwkl,bohw->oikl', windows, h.grad)
            x.grad += np.einsum('bohwkl,oikl->bihw', hgrad_windows, rot_kern)

        return 


    def get_windows(self,x:Tensor, output_size, kernel_size, padding=0, stride=1, dilate=0):
        working_input = x
        working_pad = padding
        
        # dilate the input if necessary
        if dilate != 0:
            working_input = np.insert(working_input, range(1, x.shape[2]), 0, axis=2)
            working_input = np.insert(working_input, range(1, x.shape[3]), 0, axis=3)

        # pad the input if necessary
        if working_pad != 0: 
            working_input = np.pad(working_input, pad_width=((0,), (0,), (working_pad,), (working_pad,)), mode='constant', constant_values=(0.,))

        in_b, in_c, out_h, out_w = output_size
        out_b, out_c, _, _ = x.shape
        batch_str, channel_str, kern_h_str, kern_w_str = working_input.strides

        return np.lib.stride_tricks.as_strided(
            working_input,
            (out_b, out_c, out_h, out_w, kernel_size, kernel_size),
            (batch_str, channel_str, stride * kern_h_str, stride * kern_w_str, kern_h_str, kern_w_str)
        )


Test forward pass conv layer and avg pool layer

In [9]:
# utility function we will use later when comparing manual gradients to PyTorch gradients
def cmp(s, dt, t):
  ex = torch.all(dt == t).item()
  app = torch.allclose(dt, t)
  maxdiff = (dt - t).abs().max().item()
  print(f'{s:15s} | exact: {str(ex):5s} | approximate: {str(app):5s} | maxdiff: {maxdiff}')

torch_conv = torch.nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, padding=2)
my_conv = ConvolutionalLayer(in_ch=3, out_ch=6, k_size=(5,5), padding=(2,2))

k,b = torch_conv.parameters()
my_conv.K = Tensor( k.detach().numpy() )
my_conv.B = Tensor( b.detach().numpy().reshape(1,6,1,1) )


x = Tensor(np.ones((32,3,28,28), dtype=np.float32))
# forward pass
my_y = my_conv(x)
torch_y = torch_conv(torch.tensor(x))

cmp('forward conv', torch.tensor(my_y), torch_y)


forward conv    | exact: False | approximate: False | maxdiff: 2.384185791015625e-07


In [10]:
my_avgpool = AvgPool(k_size=(2,2), stride=(2,2))
torch_avg_pool = torch.nn.AvgPool2d(kernel_size=(2,2), stride=(2,2))

my_out_avg = my_avgpool(my_y)
torch_out_avg = torch_avg_pool( torch.tensor(my_y) )

cmp('forward avg pool', torch.tensor(my_out_avg), torch_out_avg)

forward avg pool | exact: False | approximate: True  | maxdiff: 5.960464477539063e-08


In [11]:
conv1 = ConvolutionalLayer(in_ch=3, out_ch=6, k_size=(5,5), padding=(2,2), stride=(2,2))
conv2 = ConvolutionalLayer(in_ch=6, out_ch=16, k_size=(5,5), padding=(0,0), stride=(1,1))
conv3 = ConvolutionalLayer(in_ch=16, out_ch=16, k_size=(5,5), padding=(2,2), stride=(2,2))
x = Tensor(np.ones((32,3,28,28)))
out_1 = conv1(x)
print(f"out 1 {out_1.shape}")
out_2 = conv2(out_1)
print(f"out 2 {out_2.shape}")
out_3 = conv3(out_2)
print(f"out 3 {out_3.shape}")


out 1 (32, 6, 14, 14)
out 2 (32, 16, 10, 10)
out 3 (32, 16, 5, 5)


In [12]:
conv1 = ConvolutionalLayer(in_ch=6, out_ch=16, k_size=(5,5), padding=(0,0))
avgpool = AvgPool(k_size=(2,2), stride=(2,2))

In [13]:
x = Tensor(np.ones((32,6,14,14)))
out_h = conv1(x)
print(f"out h {out_h.shape}")
out_avg = avgpool(out_h)
print(f"out avg {out_avg.shape}")
avgpool.backward(out_h, out_avg)
conv1.backward(x,out_h)
# 'param':'bohw,bcijhw->ocij'   '32 x 6 x 28 x 28'  '32 x 3 x 28 x 28 x 5 x 5' -> '6 x 3 x 5 x 5' 
# 'mfkl, mcijkl->fcij'

out h (32, 16, 10, 10)
out avg (32, 16, 5, 5)
windows (32, 16, 10, 10, 2, 2)   x (32, 16, 10, 10)


In [14]:
print(out_h.shape, out_h.grad.shape)
print(out_avg.shape, out_avg.grad.shape)

(32, 16, 10, 10) (32, 16, 10, 10)
(32, 16, 5, 5) (32, 16, 5, 5)


In [15]:
print(conv1.B.grad.shape)

(1, 16, 1, 1)


In [16]:
out_h.grad.shape

(32, 16, 10, 10)

In [17]:
conv2 = ConvolutionalLayer(in_ch=6, out_ch=16, k_size=(5,5), padding=(0,0))
conv2(np.ones((32,6,14,14))).shape

(32, 16, 10, 10)

In [18]:
class Relu(NeuralModule):
    def __init__(self):
        pass
    def __call__(self, x:Tensor):
        x[x<0] = 0
        return x
    def backward(self, x:Tensor, y:Tensor):
        x.grad += y.grad * (x>0).astype(float)
        return 

In [19]:
class Sigmoid(NeuralModule):
    def __init__(self):
        self.x = None
    def __call__(self, logits):
        exp_logits = np.exp(logits - np.max(logits))  # Subtract max for stability
        probabilities = exp_logits / exp_logits.sum(axis=-1, keepdims=True)
        return probabilities

In [20]:
class CrossEntropyLoss:
    def __init__(self):
        pass

    def __call__(self, logits:Tensor, target:Tensor)->Tensor:
        exp_logits = np.exp(logits - np.max(logits))  # Subtract max for stability
        probabilities = exp_logits / exp_logits.sum(axis=-1, keepdims=True)
        real = np.clip(target, 1e-9, 1.0)  # Avoid log(0)
        return  - np.sum(real * np.log(probabilities))/real.shape[0]
    
    def backward(self, logits, y):
        exp_logits = np.exp(logits - np.max(logits))  # Subtract max for stability
        probabilities = exp_logits / exp_logits.sum(axis=-1, keepdims=True)
        #print(grad.shape, y.grad.shape)
        logits.grad += probabilities-y
        return


In [21]:
class ANN:
    def __init__(self,loss):
        self.loss = loss
        self.layers = NeuralModulesList()
        self.train = True
    
    def __call__(self, x:np.array):
        if self.train:
            self.intermediates = [Tensor(np.copy(x))]
        for layer in self.layers:
            x = layer(x)
            if self.train: self.intermediates.append(Tensor(np.copy(x)))
        return x
    
    def get_loss(self, y_hat:Tensor, y:Tensor):
        loss_value = self.loss(y_hat, y)
        if self.train: self.y = y
        return loss_value
    
    def backward(self):
        self.y.grad = np.ones_like(self.y.grad)
        self.loss.backward(self.intermediates[-1], self.y)
        for i in reversed(range(len(self.layers))):
            self.layers[i].backward(self.intermediates[i], self.intermediates[i+1])

    def step(self, lr):
        for layer in self.layers:
            layer.update_params(lr)

    def zero_grads(self):
        for layer in self.layers:
            layer.zero_grads()        


In [22]:
np.random.seed(42)
x = Tensor( np.random.randn(10, 4) )  # 100 samples, 4 features
y_true = Tensor( np.eye(3)[np.random.choice(3, 10)] )  # One-hot labels (3 classes)

loss = CrossEntropyLoss()

nn = ANN(loss)
nn.layers.extend([
    NeuralLayer(4, 5),
    Relu(),
    NeuralLayer(5, 3),
]
)

for epoch in range(3):
    print(f"epoch {epoch}")
    print()
    nn.zero_grads()
    y_hat = nn(x)
    loss = nn.get_loss(y_hat, y_true)
    nn.backward()
    nn.step(0.01)

epoch 0

epoch 1

epoch 2



### Testing implementation on IRIS dataset

In [23]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = load_iris()

# Access the features and target variable
x = iris.data  # Features (sepal length, sepal width, petal length, petal width)
y = iris.target  # Target variable (species: 0 for setosa, 1 for versicolor, 2 for virginica)

In [24]:
encoded_y = np.zeros((y.shape[0], len(np.unique(y))), dtype=np.float32)
for i in range(y.shape[0]):
    encoded_y[i, y[i]] = 1.0

In [25]:
np.random.seed(42)
train_X, test_X, train_y, test_y = train_test_split(x, encoded_y, test_size=0.2, random_state=42)

In [26]:
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)

In [27]:
class DataSet:
    def __init__(self, X, y, batch_size=32, shuffle=True):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_samples = X.shape[0]
        self.indices = np.arange(self.num_samples)
        self.current_idx = 0  # Keeps track of batch index

        if self.shuffle:
            np.random.shuffle(self.indices)  # Shuffle data at the start

    def __iter__(self):
        self.current_idx = 0
        if self.shuffle:
            np.random.shuffle(self.indices)
        return self

    def __next__(self):
        if self.current_idx >= self.num_samples:
            raise StopIteration  # Stop when all batches are processed

        # Get batch indices
        batch_indices = self.indices[self.current_idx:self.current_idx + self.batch_size]
        
        # Slice data for the batch
        X_batch = self.X[batch_indices]
        y_batch = self.y[batch_indices]

        # Move to the next batch
        self.current_idx += self.batch_size
        
        return Tensor(X_batch), Tensor(y_batch)


In [28]:
num_epochs = 100
lr = 1e-3
data_train = DataSet(train_X, train_y, batch_size=16)   
data_test = DataSet(test_X, test_y, batch_size=16)   

In [29]:
loss = CrossEntropyLoss()
in_dim = train_X.shape[1]
out_dim = train_y.shape[1]

nn = ANN(loss)
nn.layers.extend([
    NeuralLayer(in_dim, 32),
    Relu(),
    NeuralLayer(32, 16),
    Relu(),
    NeuralLayer(16, out_dim),
]
)

In [30]:
def accuracy(logits, y):
    sigma = Sigmoid()
    probs = sigma(logits)
    predictions = np.argmax(probs, axis=-1)
    reals = np.argmax(y, axis=-1)
    correct = 0
    for i in range(reals.shape[0]):
        if reals[i] == predictions[i]:
            correct += 1
    return correct

In [31]:
train_losses, test_losses, test_accuracies = [], [], []

for epoch in range(num_epochs):
    train_loss, test_loss, test_accuracy = 0,0,0
    # train
    nn.train = True
    i = 0
    for batch in data_train:
        i += 1
        nn.zero_grads()
        x, y = batch
        y_hat = nn(x)
        train_loss += nn.get_loss(y_hat, y)
        nn.backward()
        nn.step(lr)
    train_losses.append(train_loss/i)
    
    nn.train = False
    i = 0
    for batch in data_test:
        i += 1
        x, y = batch
        y_hat = nn(x)
        test_accuracy += accuracy(y_hat, y)
        test_loss += nn.get_loss(y_hat, y)
    test_losses.append(test_loss/i)
    test_accuracies.append( test_accuracy/data_test.X.shape[0] )

    print(f"epoch {epoch+1:2.0f} \t train: {train_losses[-1]:3.3f}\t test: {test_losses[-1]:3.3f} \t accuracy {100*test_accuracies[-1]:3.3f}%")


epoch  1 	 train: 1.000	 test: 0.740 	 accuracy 70.000%
epoch  2 	 train: 0.713	 test: 0.572 	 accuracy 86.667%
epoch  3 	 train: 0.609	 test: 0.490 	 accuracy 90.000%
epoch  4 	 train: 0.523	 test: 0.423 	 accuracy 93.333%
epoch  5 	 train: 0.469	 test: 0.356 	 accuracy 93.333%
epoch  6 	 train: 0.423	 test: 0.321 	 accuracy 93.333%
epoch  7 	 train: 0.390	 test: 0.295 	 accuracy 93.333%
epoch  8 	 train: 0.361	 test: 0.274 	 accuracy 93.333%
epoch  9 	 train: 0.336	 test: 0.250 	 accuracy 93.333%
epoch 10 	 train: 0.318	 test: 0.235 	 accuracy 93.333%
epoch 11 	 train: 0.300	 test: 0.225 	 accuracy 93.333%
epoch 12 	 train: 0.293	 test: 0.203 	 accuracy 96.667%
epoch 13 	 train: 0.292	 test: 0.195 	 accuracy 96.667%
epoch 14 	 train: 0.261	 test: 0.183 	 accuracy 96.667%
epoch 15 	 train: 0.262	 test: 0.183 	 accuracy 96.667%
epoch 16 	 train: 0.257	 test: 0.172 	 accuracy 96.667%
epoch 17 	 train: 0.232	 test: 0.167 	 accuracy 96.667%
epoch 18 	 train: 0.235	 test: 0.167 	 accuracy 

In [53]:
class ANN:
    def __init__(self,loss):
        self.loss = loss
        self.layers = NeuralModulesList()
        self.train = True
    
    def __call__(self, x:np.array):
        if self.train:
            self.intermediates = [Tensor(np.copy(x))]
        for layer in self.layers:
            x = layer(x)
            if self.train: self.intermediates.append(Tensor(np.copy(x)))
        return x
    
    def get_loss(self, y_hat:Tensor, y:Tensor):
        loss_value = self.loss(y_hat, y)
        if self.train: self.y = y
        return loss_value
    
    def backward(self):
        self.y.grad = np.ones_like(self.y.grad)
        self.loss.backward(self.intermediates[-1], self.y)
        for i in reversed(range(len(self.layers))):
            print(f"x {self.intermediates[i].grad.shape}  y {self.intermediates[i+1].grad.shape}")
            self.layers[i].backward(self.intermediates[i], self.intermediates[i+1])
            print(f"mean {np.mean(x.grad)}")

    def step(self, lr):
        for layer in self.layers:
            layer.update_params(lr)

    def zero_grads(self):
        for layer in self.layers:
            layer.zero_grads()    

In [54]:

class Flatten(NeuralModule):
    def __init__(self):
        super().__init__()
    
    def __call__(self, x:Tensor):
        self.x_shape = x.shape
        return x.reshape(self.x_shape[0],-1)
    
    def backward(self, x:Tensor, y:Tensor, weight_decay=0):
        x.grad += y.grad.reshape(self.x_shape)
        return

In [55]:
nn = ANN(loss,)

conv = ConvolutionalLayer(in_ch=1, out_ch=3, k_size=(3,3), padding=(1,1))
pool = AvgPool(k_size=(3,3), padding=(0,0))

nn.layers.extend([
    ConvolutionalLayer(in_ch=1, out_ch=3, k_size=(3,3), padding=(1,1)),
    Relu(),
    AvgPool(k_size=(3,3), padding=(0,0)),
    Flatten(),
    NeuralLayer(in_dim=12, out_dim=1),
]
)

In [57]:
x = Tensor( np.ones((32,1,4,4)) )
y = Tensor( np.ones((32,1)) )
y_hat = nn(x)
train_loss = nn.get_loss(y_hat, y)
nn.backward()

x (32, 12)  y (32, 1)
mean 0.0
x (32, 3, 2, 2)  y (32, 12)
mean 0.0
x (32, 3, 4, 4)  y (32, 3, 2, 2)
windows (32, 3, 4, 4, 3, 3)   x (32, 3, 4, 4)
mean 0.0
x (32, 3, 4, 4)  y (32, 3, 4, 4)
mean 0.0
x (32, 1, 4, 4)  y (32, 3, 4, 4)
mean 0.0


In [58]:
train_loss

Tensor(-0.)