In [4]:
import numpy as np

np.random.seed(0)
img_rgb = np.random.randint(0, 256, size=(1, 3, 8, 8), dtype=np.uint8)

print("Shape:", img_rgb.shape)
print("Red channel:\n", img_rgb[0, 0])
print("Green channel:\n", img_rgb[0, 1])
print("Blue channel:\n", img_rgb[0, 2])


Shape: (1, 3, 8, 8)
Red channel:
 [[172  10 127 140  47 170 196 151]
 [117 166  22 183 192 204  33 216]
 [ 67 179  78 154 251  82 162 219]
 [195 118 125 139 103 125 229 216]
 [  9 164 116 108 211 222 161 159]
 [ 21  81  89 165 242 214 102  98]
 [ 36 183   5 112  87  58  43  76]
 [ 70  60  75 228 216 189 132  14]]
Green channel:
 [[ 88 154 178 246 140 205 204  69]
 [ 58  57  41  98 193  66  72 122]
 [230 125 174 202  39  74 234 207]
 [ 87 168 101 135 174 200 223 122]
 [ 88  94 107 145  81 139 141 100]
 [165 230 243 236  25  66   9 214]
 [ 77 107  47  18  72 152  95  86]
 [  9  27  78  22 148 151 238 165]]
Blue channel:
 [[115   8  45   5 208  19  69  94]
 [243 146  38 213 197  30   8 245]
 [254  71  53 199  79   7 238  35]
 [175  29 185 222 192   9 190 222]
 [ 82 187 134 250  99  96  62 121]
 [216 167 149 204 177 124   8 205]
 [243 130  35 118  29   3  62 133]
 [147 194 208 199 147  12 203 173]]


In [None]:
def im2col(self):
    pass



In [3]:
def conv2d_forward(x, weight, bias=None, stride=1, padding=0):
    # x:       (N, Cin, H, W)    → batch of input images
    # weight:  (Cout, Cin, kh, kw) → convolution filters
    # bias:    (Cout,) or None    → optional bias term for each output channel
    # stride:  int                → step size the kernel moves each time
    # padding: int                → how many zeros to pad around input edges

    # Unpack shapes
    N, Cin, H, W = x.shape                # N=batch size, Cin=input channels, H/W=input height/width
    Cout, Cin_w, kh, kw = weight.shape    # Cout=output channels, Cin_w=weight input channels
    assert Cin == Cin_w, f"Input channels ({Cin}) != filter channels ({Cin_w})"

    # Calculate output feature map dimensions (integer division)
    Hout = (H + 2*padding - kh) // stride + 1
    Wout = (W + 2*padding - kw) // stride + 1

    # Sanity checks
    assert Hout > 0 and Wout > 0, "Output size <= 0 — check stride/padding/kernel."
    if bias is not None:
        assert bias.shape == (Cout,), f"bias should be ({Cout},)"

    # Apply zero-padding to input if requested
    # np.pad arguments: ((pad_before, pad_after), ...) for each dimension
    # Here: batch axis (0,0), channel axis (0,0), height axis (padding,padding), width axis (padding,padding)
    if padding > 0:
        x_pad = np.pad(x, ((0,0), (0,0), (padding,padding), (padding,padding)), mode='constant')
    else:
        x_pad = x

    # Allocate empty output array
    y = np.zeros((N, Cout, Hout, Wout), dtype=x.dtype)

    # Loop over each dimension to apply convolution
    for n in range(N):            # For each image in batch
        for co in range(Cout):    # For each filter (output channel)
            for i in range(Hout): # For each output row
                for j in range(Wout): # For each output column

                    # Map output location (i,j) to top-left corner in input
                    r = i * stride  # Row index in input where kernel starts
                    c = j * stride  # Col index in input where kernel starts

                    # Extract input patch the same size as the kernel
                    # Shape: (Cin, kh, kw) — all input channels for this patch
                    patch = x_pad[n, :, r:r+kh, c:c+kw]

                    # Get the corresponding filter for this output channel
                    filt = weight[co]  # Shape: (Cin, kh, kw)

                    # Elementwise multiply patch and filter, then sum all values
                    val = np.sum(patch * filt)

                    # Add bias for this output channel if provided
                    if bias is not None:
                        val += bias[co]

                    # Store result in output feature map
                    y[n, co, i, j] = val

    # Return the output tensor: (N, Cout, Hout, Wout)
    return y


In [12]:
def ReLU(self, x):
    return np.maximum(0, x)

In [None]:
def BatchNorm2D(self, x):

    N, Cin, H, W = x.shape                # N=batch size, Cin=input channels, H/W=input height/width

    for channel in range(Cin):
        channel_data = x[:, channel, :, :]  # shape: (N, H, W)
        mean = channel_data.mean()
        std = channel_data.std()
        # normalize that channel across all N images
        channel_data -= mean
        channel_data /= std

