In [153]:
import numpy as np
import tensorflow as tf
import os
import cv2

In [154]:
#import numpy as np

def conv2d_valid_forward(x, w, stride):
    """
    Forward pass for 2D convolution (valid padding).

    Args:
        x: Input array of shape (N, C_in, H_in, W_in)
        w: Weights array of shape (C_out, C_in, kH, kW)
        stride: Tuple/list [s_h, s_w]

    Returns:
        out: Output array of shape (N, C_out, H_out, W_out)
    """
    N, C_in, H_in, W_in = x.shape
    C_out, _, kH, kW = w.shape
    s_h, s_w = stride

    # Compute output dimensions
    H_out = (H_in - kH) // s_h + 1
    W_out = (W_in - kW) // s_w + 1
    out = np.zeros((N, C_out, H_out, W_out), dtype=x.dtype)

    # Perform convolution
    for n in range(N):
        for oc in range(C_out):
            for i in range(H_out):
                for j in range(W_out):
                    i0 = i * s_h
                    j0 = j * s_w
                    patch = x[n, :, i0:i0+kH, j0:j0+kW]        # (C_in, kH, kW)
                    out[n, oc, i, j] = np.sum(patch * w[oc, :, :, :])
    return out

def conv2d_valid_backward(x, w, stride, dout):
    """
    Backward pass for 2D convolution (valid padding).

    Args:
        x: Input array of shape (N, C_in, H_in, W_in)
        w: Weights array of shape (C_out, C_in, kH, kW)
        stride: Tuple/list [s_h, s_w]
        dout: Upstream gradient of shape (N, C_out, H_out, W_out)

    Returns:
        dx: Gradient w.r.t. x, shape (N, C_in, H_in, W_in)
        dw: Gradient w.r.t. w, shape (C_out, C_in, kH, kW)
    """
    N, C_in, H_in, W_in = x.shape
    C_out, _, kH, kW = w.shape
    s_h, s_w = stride
    _, _, H_out, W_out = dout.shape

    dx = np.zeros_like(x)
    dw = np.zeros_like(w)

    # Gradient w.r.t. weights
    for n in range(N):
        for oc in range(C_out):
            for i in range(H_out):
                for j in range(W_out):
                    i0 = i * s_h
                    j0 = j * s_w
                    patch = x[n, :, i0:i0+kH, j0:j0+kW]   # (C_in, kH, kW)
                    dw[oc] += dout[n, oc, i, j] * patch

    # Gradient w.r.t. input
    for n in range(N):
        for oc in range(C_out):
            for i in range(H_out):
                for j in range(W_out):
                    i0 = i * s_h
                    j0 = j * s_w
                    # propagate dout[n,oc,i,j] through w[oc]
                    dx[n, :, i0:i0+kH, j0:j0+kW] += dout[n, oc, i, j] * w[oc]

    return dx, dw


In [155]:
import tensorflow as tf

def deconv2d(x, w, stride):
    # x: [batch, in_channels, h, w]
    # w: [out_channels, in_channels, k_h, k_w]
    x_nhwc = tf.transpose(x, [0, 2, 3, 1])
    w_hwio = tf.transpose(w, [2, 3, 0, 1])
    batch, h, w_, in_c = tf.unstack(tf.shape(x_nhwc))
    k_h, k_w, out_c, _ = tf.unstack(tf.shape(w_hwio))
    out_h = (h - 1) * stride[0] + k_h
    out_w = (w_ - 1) * stride[1] + k_w
    y_nhwc = tf.nn.conv2d_transpose(
        x_nhwc, w_hwio, output_shape=[batch, out_h, out_w, out_c],
        strides=[1, stride[0], stride[1], 1], padding='VALID', data_format='NHWC'
    )
    y = tf.transpose(y_nhwc, [0, 3, 1, 2])
    return y

# Backward pass using GradientTape
def deconv2d_backward(x, w, stride, grad_y):
    x = tf.convert_to_tensor(x)
    w = tf.convert_to_tensor(w)
    grad_y = tf.convert_to_tensor(grad_y)
    with tf.GradientTape(persistent=True) as tape:
        tape.watch([x, w])
        y = deconv2d(x, w, stride)
    grad_x = tape.gradient(y, x, output_gradients=grad_y)
    grad_w = tape.gradient(y, w, output_gradients=grad_y)
    del tape
    return grad_x, grad_w


import tensorflow as tf

def deconv2d(x, w, stride):
    # x: [batch, in_channels, h, w]
    # w: [out_channels, in_channels, k_h, k_w]
    # stride: [s_h, s_w]
    x_nhwc = tf.transpose(x, [0, 2, 3, 1])
    w_hwio = tf.transpose(w, [2, 3, 0, 1])
    batch, h, w_, in_c = tf.unstack(tf.shape(x_nhwc))
    k_h, k_w, out_c, _ = tf.unstack(tf.shape(w_hwio))
    out_h = (h - 1) * stride[0] + k_h
    out_w = (w_ - 1) * stride[1] + k_w
    y_nhwc = tf.nn.conv2d_transpose(
        x_nhwc, w_hwio, output_shape=[batch, out_h, out_w, out_c],
        strides=[1, stride[0], stride[1], 1], padding='VALID', data_format='NHWC'
    )
    y = tf.transpose(y_nhwc, [0, 3, 1, 2])
    return y

def deconv2d_backward(x, w, stride, grad_y):
    # x: [batch, in_channels, h, w]
    # w: [out_channels, in_channels, k_h, k_w]
    # grad_y: [batch, out_channels, out_h, out_w]
    # stride: [s_h, s_w]
    x_nhwc = tf.transpose(x, [0, 2, 3, 1])
    w_hwio = tf.transpose(w, [2, 3, 0, 1])
    grad_y_nhwc = tf.transpose(grad_y, [0, 2, 3, 1])

    # Gradient w.r.t. input (x): standard convolution
    grad_x_nhwc = tf.nn.conv2d(
        grad_y_nhwc,
        w_hwio,
        strides=[1, stride[0], stride[1], 1],
        padding='VALID',
        data_format='NHWC'
    )
    grad_x = tf.transpose(grad_x_nhwc, [0, 3, 1, 2])

    # Gradient w.r.t. filter (w): manual calculation
    # We need to compute the gradient of the filter as a cross-correlation between x and grad_y
    # This is equivalent to a convolution with appropriate transpositions
    batch = tf.shape(x)[0]
    in_c = tf.shape(x)[1]
    out_c = tf.shape(w)[0]
    k_h = tf.shape(w)[2]
    k_w = tf.shape(w)[3]

    grad_w = []
    for o in range(out_c):
        grad_w_o = []
        for i in range(in_c):
            # Extract input channel i and grad_y channel o
            x_i = x[:, i, :, :]  # [batch, h, w]
            grad_y_o = grad_y[:, o, :, :]  # [batch, out_h, out_w]
            # Add channel dim for conv2d
            x_i_exp = tf.expand_dims(x_i, 1)  # [batch, 1, h, w]
            grad_y_o_exp = tf.expand_dims(grad_y_o, -1)  # [batch, out_h, out_w, 1]
            # Compute correlation (gradient)
            grad_w_oi = tf.nn.conv2d(
                x_i_exp,
                grad_y_o_exp,
                strides=[1, 1, 1, 1],
                padding='VALID',
                data_format='NCHW'
            )
            grad_w_o.append(tf.squeeze(grad_w_oi, [1, 4]))  # [k_h, k_w]
        grad_w.append(tf.stack(grad_w_o, axis=0))  # [in_c, k_h, k_w]
    grad_w = tf.stack(grad_w, axis=0)  # [out_c, in_c, k_h, k_w]

    return grad_x, grad_w




def deconv2d(x, w, stride):
    """
    Performs a valid-padding 2D transposed convolution on NCHW data.

    Args:
      x: 4-D Tensor, shape [batch, in_channels, height, width], dtype float32.
      w: 4-D Tensor, shape [out_channels, in_channels, k_h, k_w], dtype float32.
      stride: 1-D Python list or tuple of 2 ints [s_h, s_w].

    Returns:
      4-D Tensor of shape [batch, out_channels, out_h, out_w].
    """
    # 1) Convert x from NCHW to NHWC
    x_nhwc = tf.transpose(x, [0, 2, 3, 1])
    # 2) Convert w from [O, I, k_h, k_w] to [k_h, k_w, O, I]
    w_hwio = tf.transpose(w, [2, 3, 0, 1])

    # 3) Compute output spatial dimensions for VALID padding:
    #    out_h = (h - 1) * s_h + k_h
    #    out_w = (w - 1) * s_w + k_w
    batch, h, w_, in_c = tf.unstack(tf.shape(x_nhwc))
    k_h, k_w, out_c, _ = tf.unstack(tf.shape(w_hwio))
    out_h = (h - 1) * stride[0] + k_h
    out_w = (w_ - 1) * stride[1] + k_w

    # 4) Perform conv2d_transpose in NHWC format
    y_nhwc = tf.nn.conv2d_transpose(
        x_nhwc,
        w_hwio,
        output_shape=[batch, out_h, out_w, out_c],
        strides=[1, stride[0], stride[1], 1],
        padding='VALID',
        data_format='NHWC'
    )  # [batch, out_h, out_w, out_c] [2]

    # 5) Convert back to NCHW
    y = tf.transpose(y_nhwc, [0, 3, 1, 2])
    return y


def deconv2d_backward(x, w, stride, grad_y):
    """
    Computes gradients w.r.t. the input x and the filter w for the
    deconv2d op defined above.

    Args:
      x: Same as in deconv2d.
      w: Same as in deconv2d.
      stride: Same as in deconv2d.
      grad_y: 4-D Tensor, upstream gradient of shape [batch, out_ch, out_h, out_w].

    Returns:
      grad_x: Gradient w.r.t. x, same shape as x.
      grad_w: Gradient w.r.t. w, same shape as w.
    """
    # 1) Transform to NHWC and HWIO-compatible formats
    x_nhwc = tf.transpose(x, [0, 2, 3, 1])
    w_hwio = tf.transpose(w, [2, 3, 0, 1])
    grad_y_nhwc = tf.transpose(grad_y, [0, 2, 3, 1])

    # 2) Gradient w.r.t. input: conv2d on grad_y with same filter
    grad_x_nhwc = tf.nn.conv2d(
        grad_y_nhwc,
        w_hwio,
        strides=[1, stride[0], stride[1], 1],
        padding='VALID',
        data_format='NHWC'
    )  # [batch, h, w, in_c]

    # 3) Gradient w.r.t. filter: conv2d_backprop_filter
    #    filter_sizes = [k_h, k_w, in_c, out_c]
    k_h, k_w = w.shape[2], w.shape[3]
    in_c = x.shape[1]
    out_c = w.shape[0]
    filter_sizes = [k_h, k_w, in_c, out_c]

    grad_w_hwio = tf.nn.conv2d_backprop_filter(
        input=x_nhwc,
        filter_sizes=filter_sizes,
        out_backprop=grad_y_nhwc,
        strides=[1, stride[0], stride[1], 1],
        padding='VALID',
        data_format='NHWC'
    )  # [k_h, k_w, in_c, out_c] [7]

    # 4) Convert gradients back to original layouts
    grad_x = tf.transpose(grad_x_nhwc, [0, 3, 1, 2])  # -> [batch, in_c, h, w]
    grad_w = tf.transpose(grad_w_hwio, [3, 2, 0, 1])  # -> [out_c, in_c, k_h, k_w]

    return grad_x, grad_w


In [156]:
#import numpy as np

def batch_norm_forward(x, gamma, beta, running_mean, running_var, momentum=0.99, epsilon=1e-3, training=True):
    """
    Batch normalization forward pass for 4D input [batch, channels, height, width].

    Inputs:
    - x: Input data, shape (N, C, H, W)
    - gamma: Scale parameter, shape (C,)
    - beta: Shift parameter, shape (C,)
    - running_mean: Running mean (for inference), shape (C,)
    - running_var: Running variance (for inference), shape (C,)
    - momentum: Momentum for running mean/variance.
    - epsilon: Small float for numerical stability.
    - training: Boolean, True for training mode, False for inference.

    Returns:
    - out: Batch-normalized output, same shape as x
    - cache: Values needed for backward pass (used only in training)
    - updated_running_mean, updated_running_var: Updated running statistics
    """
    N, C, H, W = x.shape
    if training:
        # Compute mean and variance across batch, height, and width (per channel)
        mean = np.mean(x, axis=(0, 2, 3), keepdims=True)  # shape (1, C, 1, 1)
        var = np.var(x, axis=(0, 2, 3), keepdims=True)    # shape (1, C, 1, 1)

        # Normalize
        x_hat = (x - mean) / np.sqrt(var + epsilon)
        out = gamma.reshape(1, C, 1, 1) * x_hat + beta.reshape(1, C, 1, 1)

        # Update running stats
        updated_running_mean = momentum * running_mean + (1 - momentum) * mean.squeeze()
        updated_running_var = momentum * running_var + (1 - momentum) * var.squeeze()

        cache = (x, x_hat, mean, var, gamma, beta, epsilon)
    else:
        # Use running statistics for inference
        mean = running_mean.reshape(1, C, 1, 1)
        var = running_var.reshape(1, C, 1, 1)
        x_hat = (x - mean) / np.sqrt(var + epsilon)
        out = gamma.reshape(1, C, 1, 1) * x_hat + beta.reshape(1, C, 1, 1)

        updated_running_mean = running_mean
        updated_running_var = running_var
        cache = None  # Not needed in inference

    return out, cache, updated_running_mean, updated_running_var


In [157]:
def batch_norm_backward(dout, cache):
    """
    Backward pass for batch normalization (channels first).

    Inputs:
    - dout: Upstream gradient, shape (N, C, H, W)
    - cache: Tuple from forward pass

    Returns:
    - dx: Gradient w.r.t. input x, shape (N, C, H, W)
    - dgamma: Gradient w.r.t. gamma, shape (C,)
    - dbeta: Gradient w.r.t. beta, shape (C,)
    """
    x, x_hat, mean, var, gamma, beta, epsilon = cache
    N, C, H, W = dout.shape
    size = N * H * W

    # Gradients of beta and gamma
    dbeta = np.sum(dout, axis=(0, 2, 3))
    dgamma = np.sum(dout * x_hat, axis=(0, 2, 3))

    # Gradient w.r.t. x_hat
    dx_hat = dout * gamma.reshape(1, C, 1, 1)

    # Intermediate gradients
    dvar = np.sum(dx_hat * (x - mean) * -0.5 * np.power(var + epsilon, -1.5), axis=(0, 2, 3), keepdims=True)
    dmean = np.sum(dx_hat * -1 / np.sqrt(var + epsilon), axis=(0, 2, 3), keepdims=True) + \
            dvar * np.sum(-2 * (x - mean), axis=(0, 2, 3), keepdims=True) / size

    # Gradient w.r.t. input x
    dx = dx_hat / np.sqrt(var + epsilon) + dvar * 2 * (x - mean) / size + dmean / size

    return dx, dgamma, dbeta


In [158]:
def relu(data):
    return np.maximum(0.0,data)

In [159]:
def relu_backward(data):
    return (data>0).astype(float)

In [160]:
def square_error(IP,OP):
    error=(IP-OP)**2
    return error

In [161]:
dataset_path="D:\REQS\PROJECT\dataset\\train"

#Hypreparameters
epochs=10
back_spin=5
learning_rate=0.001
e=0.00001
batch_size=1
image_size=[3,32,32]

In [162]:
#MODEL:2 -- THE BASE
#bn is srtuctured as gamma, beta, rm, rv
s_all=[2,2]
#3x32x32
conv_f1=np.random.randn(8,3,4,4)* np.sqrt(2.0/(3*4*4))
bn_f1=np.zeros((4,8),dtype=np.float32)
bn_f1[0,:]=1
bn_f1[3,:]=1
#8x15x15
conv_f2=np.random.randn(16,8,3,3)* np.sqrt(2.0/(8*3*3))
bn_f2=np.zeros((4,16),dtype=np.float32)
bn_f2[0,:]=1
bn_f2[3,:]=1
#16x7x7
deconv_f3=np.random.randn(10,16,5,5)* np.sqrt(2.0/(12*5*5))
bn_f3=np.zeros((4,10),dtype=np.float32)
bn_f3[0,:]=1
bn_f3[3,:]=1
#10x17x17
deconv_f4=np.random.randn(6,10,4,4)* np.sqrt(2.0/(3*4*4))
bn_f4=np.zeros((4,6),dtype=np.float32)
bn_f4[0,:]=1
bn_f4[3,:]=1
#6x36x36
deconv_f5=np.random.randn(3,6,5,5)* np.sqrt(2.0/(3*5*5))
bn_f5=np.zeros((4,3),dtype=np.float32)
bn_f5[0,:]=1
bn_f5[3,:]=1
#3x75x75
conv_f6=np.random.randn(6,3,5,5)* np.sqrt(2.0/(3*5*5))
bn_f6=np.zeros((4,6),dtype=np.float32)
bn_f6[0,:]=1
bn_f6[3,:]=1
#6x36x36
conv_f7=np.random.randn(10,6,4,4)* np.sqrt(2.0/(3*4*4))
bn_f7=np.zeros((4,10),dtype=np.float32)
bn_f7[0,:]=1
bn_f7[3,:]=1
#10x17x17
conv_f8=np.random.randn(16,10,5,5)* np.sqrt(2.0/(3*5*5))
bn_f8=np.zeros((4,16),dtype=np.float32)
bn_f8[0,:]=1
bn_f8[3,:]=1
#16x7x7
deconv_f9=np.random.randn(8,16,3,3)* np.sqrt(2.0/(3*4*4))
bn_f9=np.zeros((4,8),dtype=np.float32)
bn_f9[0,:]=1
bn_f9[3,:]=1
#8x15x15
deconv_f10=np.random.randn(3,8,4,4)* np.sqrt(2.0/(3*4*4))
bn_f10=np.zeros((4,3),dtype=np.float32)
bn_f10[0,:]=1
bn_f10[3,:]=1
#3x32x32

In [163]:
#flow 1,2,3,4,5,6,7,8,9,10
def forward(IP):
    a1=conv2d_valid_forward(IP,conv_f1,(2,2))
    bn_a1, cache_a1, bn_f1[2], bn_f1[3]=batch_norm_forward(a1, bn_f1[0], bn_f1[1], bn_f1[2], bn_f1[3], 0.99, 0.001, True)
    act_a1=relu(bn_a1)
    #print(act_a1.shape)
    a2=conv2d_valid_forward(act_a1,conv_f2,(2,2))
    bn_a2, cache_a2, bn_f2[2], bn_f2[3]=batch_norm_forward(a2, bn_f2[0], bn_f2[1], bn_f2[2], bn_f2[3], 0.99, 0.001, True)
    act_a2=relu(bn_a2)
    #print(act_a2.shape)
    a3=deconv2d(act_a2,deconv_f3,(2,2))
    bn_a3, cache_a3, bn_f3[2], bn_f3[3]=batch_norm_forward(a3, bn_f3[0], bn_f3[1], bn_f3[2], bn_f3[3], 0.99, 0.001, True)
    act_a3=relu(bn_a3)
    #print(act_a3.shape)
    a4=deconv2d(act_a3,deconv_f4,(2,2))
    bn_a4, cache_a4, bn_f4[2], bn_f4[3]=batch_norm_forward(a4, bn_f4[0], bn_f4[1], bn_f4[2], bn_f4[3], 0.99, 0.001, True)
    act_a4=relu(bn_a4)
    #print(act_a4.shape)
    a5=deconv2d(act_a4,deconv_f5,(2,2))
    bn_a5, cache_a5, bn_f5[2], bn_f5[3]=batch_norm_forward(a5, bn_f5[0], bn_f5[1], bn_f5[2], bn_f5[3], 0.99, 0.001, True)
    act_a5=relu(bn_a5)
    #print(act_a5.shape)
    a6=conv2d_valid_forward(act_a5,conv_f6,(2,2))
    bn_a6, cache_a6, bn_f6[2], bn_f6[3]=batch_norm_forward(a6, bn_f6[0], bn_f6[1], bn_f6[2], bn_f6[3], 0.99, 0.001, True)
    act_a6=relu(bn_a6)
    #print(act_a6.shape)
    a7=conv2d_valid_forward(act_a6,conv_f7,(2,2))
    bn_a7, cache_a7, bn_f7[2], bn_f7[3]=batch_norm_forward(a7, bn_f7[0], bn_f7[1], bn_f7[2], bn_f7[3], 0.99, 0.001, True)
    act_a7=relu(bn_a7)
    #print(act_a7.shape)
    a8=conv2d_valid_forward(act_a7,conv_f8,(2,2))
    bn_a8, cache_a8, bn_f8[2], bn_f8[3]=batch_norm_forward(a8, bn_f8[0], bn_f8[1], bn_f8[2], bn_f8[3], 0.99, 0.001, True)
    act_a8=relu(bn_a8)
    a9=deconv2d(act_a8,deconv_f9,(2,2))
    bn_a9, cache_a9, bn_f9[2], bn_f9[3]=batch_norm_forward(a9, bn_f9[0], bn_f9[1], bn_f9[2], bn_f9[3], 0.99, 0.001, True)
    act_a9=relu(bn_a9)
    a10=deconv2d(act_a9,deconv_f10,(2,2))
    bn_a10, cache_a10, bn_f10[2], bn_f10[3]=batch_norm_forward(a10, bn_f10[0], bn_f10[1], bn_f10[2], bn_f10[3], 0.99, 0.001, True)
    act_a10=relu(bn_a10)
    
    return act_a10, bn_a10, cache_a10, a10, act_a9, bn_a9, cache_a9, a9, act_a8, bn_a8, cache_a8, a8, act_a7, bn_a7, cache_a7, a7, act_a6, bn_a6, cache_a6, a6, act_a5, bn_a5, cache_a5, a5, act_a4, bn_a4, cache_a4, a4, act_a3, bn_a3, cache_a3, a3, act_a2, bn_a2, cache_a2, a2, act_a1, bn_a1, cache_a1, a1

In [164]:
def m1_backward(IP, layers, losse):

    d_bn_f10=np.zeros((4,3),dtype=np.float32)
    d_bn_f9=np.zeros((4,8),dtype=np.float32)
    d_bn_f8=np.zeros((4,16),dtype=np.float32)
    d_bn_f7=np.zeros((4,10),dtype=np.float32)
    d_bn_f6=np.zeros((4,6),dtype=np.float32)
    d_bn_f5=np.zeros((4,3),dtype=np.float32)
    d_bn_f4=np.zeros((4,6),dtype=np.float32)
    d_bn_f3=np.zeros((4,10),dtype=np.float32)
    d_bn_f2=np.zeros((4,16),dtype=np.float32)
    d_bn_f1=np.zeros((4,8),dtype=np.float32)

    act_a10, bn_a10, cache_a10, a10, act_a9, bn_a9, cache_a9, a9, act_a8, bn_a8, cache_a8, a8, act_a7, bn_a7, cache_a7, a7, act_a6, bn_a6, cache_a6, a6, act_a5, bn_a5, cache_a5, a5, act_a4, bn_a4, cache_a4, a4, act_a3, bn_a3, cache_a3, a3, act_a2, bn_a2, cache_a2, a2, act_a1, bn_a1, cache_a1, a1=layers
    d_act_a10=losse
    d_bn_a10=relu_backward(d_act_a10)
    d_a10, d_bn_f10[0], d_bn_f10[1]=batch_norm_backward(d_bn_a10, cache_a10)
    d_act_a9, d_deconv_f10=deconv2d_backward(act_a9, deconv_f10, (2,2), d_a10)
    #print(d_act_a9.shape)
    d_bn_a9=relu_backward(np.array(d_act_a9))
    d_a9, d_bn_f9[0], d_bn_f9[1]=batch_norm_backward(d_bn_a9, cache_a9)
    d_act_a8, d_deconv_f9=deconv2d_backward(act_a8, deconv_f9, (2,2), d_a9)
    #print(d_act_a8.shape)
    d_bn_a8=relu_backward(np.array(d_act_a8))
    d_a8, d_bn_f8[0], d_bn_f8[1]=batch_norm_backward(d_bn_a8, cache_a8)
    d_act_a7, d_conv_f8=conv2d_valid_backward(act_a7, conv_f8, (2,2), d_a8)
    #print(d_act_a7.shape)
    d_bn_a7=relu_backward(np.array(d_act_a7))
    d_a7, d_bn_f7[0], d_bn_f7[1]=batch_norm_backward(d_bn_a7, cache_a7)
    d_act_a6, d_conv_f7=conv2d_valid_backward(act_a6, conv_f7, (2,2), d_a7)
    #print(d_act_a6.shape)
    d_bn_a6=relu_backward(np.array(d_act_a6))
    d_a6, d_bn_f6[0], d_bn_f6[1]=batch_norm_backward(d_bn_a6, cache_a6)
    d_act_a5, d_conv_f6=conv2d_valid_backward(act_a5, conv_f6, (2,2), d_a6)
    #print(d_act_a5.shape)
    d_bn_a5=relu_backward(np.array(d_act_a5))
    d_a5, d_bn_f5[0], d_bn_f5[0]=batch_norm_backward(d_bn_a5, cache_a5)
    d_act_a4, d_deconv_f5=deconv2d_backward(act_a4, deconv_f5, (2,2), d_a5)
    #print(d_act_a4.shape)
    d_bn_a4=relu_backward(np.array(d_act_a4))
    d_a4, d_bn_f4[0], d_bn_f4[1]=batch_norm_backward(d_bn_a4, cache_a4)
    d_act_a3, d_deconv_f4=deconv2d_backward(act_a3, deconv_f4, (2,2), d_a4)
    #print(d_act_a3.shape)
    d_bn_a3=relu_backward(np.array(d_act_a3))
    d_a3, d_bn_f3[0], d_bn_f3[1]=batch_norm_backward(d_bn_a3, cache_a3)
    d_act_a2, d_deconv_f3=deconv2d_backward(act_a2, deconv_f3, (2,2), d_a3)
    #print(d_act_a2.shape)
    d_bn_a2=relu_backward(np.array(d_act_a2))
    d_a2, d_bn_f2[0], d_bn_f2[1]=batch_norm_backward(d_bn_a2, cache_a2)
    d_act_a1, d_conv_f2=conv2d_valid_backward(act_a1, conv_f2, (2,2), d_a2)
    #print(d_act_a1.shape)
    d_bn_a1=relu_backward(np.array(d_act_a1))
    d_a1, d_bn_f1[0], d_bn_f1[1]=batch_norm_backward(d_bn_a1, cache_a1)
    d_IP, d_conv_f1=conv2d_valid_backward(IP, conv_f1, (2,2), d_a1)
    return d_conv_f1, d_bn_f1, d_conv_f2, d_bn_f2, d_deconv_f3, d_bn_f3, d_deconv_f4, d_bn_f4, d_deconv_f5, d_bn_f5, d_conv_f6, d_bn_f6, d_conv_f7, d_bn_f7, d_conv_f8, d_bn_f8, d_deconv_f9, d_bn_f9, d_deconv_f10, d_bn_f10

In [165]:
#Update Parameters
def update_params(diffs, lr, conv_f1, conv_f2, deconv_f3, deconv_f4, deconv_f5, conv_f6, conv_f7, conv_f8, deconv_f9, deconv_f10, bn_f1, bn_f2, bn_f3, bn_f4, bn_f5, bn_f6, bn_f7, bn_f8, bn_f9, bn_f10):
    d_conv_f1, d_bn_f1, d_conv_f2, d_bn_f2, d_deconv_f3, d_bn_f3, d_deconv_f4, d_bn_f4, d_deconv_f5, d_bn_f5, d_conv_f6, d_bn_f6, d_conv_f7, d_bn_f7, d_conv_f8, d_bn_f8, d_deconv_f9, d_bn_f9, d_deconv_f10, d_bn_f10=diffs
    conv_f1=conv_f1-lr*d_conv_f1
    bn_f1-=lr*d_bn_f1
    conv_f2-=lr*d_conv_f2
    bn_f2-=lr*d_bn_f2
    deconv_f3-=lr*d_deconv_f3
    bn_f3-=lr*d_bn_f3
    deconv_f4-=lr*d_deconv_f4
    bn_f4-=lr*d_bn_f4
    deconv_f5-=lr*d_deconv_f5
    bn_f5-=lr*d_bn_f5
    conv_f6-=lr*d_conv_f6
    bn_f6-=lr*d_bn_f6
    conv_f7-=lr*d_conv_f7
    bn_f7-=lr*d_bn_f7
    conv_f8-=lr*d_conv_f8
    bn_f8-=lr*d_bn_f8
    deconv_f9-=lr*d_deconv_f9
    bn_f9-=lr*d_bn_f9
    deconv_f10-=lr*d_deconv_f10
    bn_f10-=lr*d_bn_f10
    return

In [166]:
def load_image_paths():
    """Loads all image file paths from the dataset directory."""
    supported_formats = ('.png', '.jpg', '.jpeg', '.bmp')
    image_paths = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) 
                    if f.endswith(supported_formats)]
    return image_paths
    
def load_image(image_path):
    """Loads an image, resizes it, and normalizes pixel values to the range [0, 1]."""
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not load image: {image_path}")
    #Eimage = cv2.resize(image, image_size)
    image = image.astype(np.float64) / 255.0  # Normalize to [0, 1]
    return image


In [None]:
def get_batch(image_paths):
    """Generates a batch of images for training."""
    batch_paths = np.random.choice(image_paths, batch_size, replace=False)
    images = np.array([load_image(path) for path in batch_paths])
    return images
    
image_paths = load_image_paths()
for epoch in range(epochs):
    total_loss = 0
    num_batches = len(image_paths) // batch_size
        
    for batch_index in range(num_batches):
        batch = get_batch(image_paths)
        batch = np.transpose(batch, (0,3,1,2))  # Proper channel-first format
        # Forward pass through the model
        results = forward(batch)
        act_a10, bn_a10, cache_a10, a10, act_a9, bn_a9, cache_a9, a9, act_a8, bn_a8, cache_a8, a8, act_a7, bn_a7, cache_a7, a7, act_a6, bn_a6, cache_a6, a6, act_a5, bn_a5, cache_a5, a5, act_a4, bn_a4, cache_a4, a4, act_a3, bn_a3, cache_a3, a3, act_a2, bn_a2, cache_a2, a2, act_a1, bn_a1, cache_a1, a1=results
            
        # Compute error (assume the error function is already implemented in the model)
        losse = square_error(act_a10, batch)
        loss =np.mean(losse)
        # Backward pass to compute gradients
        box=m1_backward(batch, results, losse)
            
        # Update model parameters (assume the model has a method to update its parameters)
        update_params(box, learning_rate, conv_f1, conv_f2, deconv_f3, deconv_f4, deconv_f5, conv_f6, conv_f7, conv_f8, deconv_f9, deconv_f10, bn_f1, bn_f2, bn_f3, bn_f4, bn_f5, bn_f6, bn_f7, bn_f8, bn_f9, bn_f10)
        
        total_loss += loss

        if (batch_index + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_index+1}/{num_batches}], Loss: {loss:.4f}")
            #print("\n")
            #print("-_-_-_-_-_-_-_-_-_")
            #print("\n")
        
    avg_loss = total_loss / num_batches
    print(f"Epoch [{epoch+1}/{epochs}] completed. Average Loss: {avg_loss:.4f}")

Epoch [1/10], Batch [10/50000], Loss: 0.1487
Epoch [1/10], Batch [20/50000], Loss: 0.2041
Epoch [1/10], Batch [30/50000], Loss: 0.3072
Epoch [1/10], Batch [40/50000], Loss: 0.3022
Epoch [1/10], Batch [50/50000], Loss: 0.2100
Epoch [1/10], Batch [60/50000], Loss: 0.2134
Epoch [1/10], Batch [70/50000], Loss: 0.3472
Epoch [1/10], Batch [80/50000], Loss: 0.1127
Epoch [1/10], Batch [90/50000], Loss: 0.2754
Epoch [1/10], Batch [100/50000], Loss: 0.2661
Epoch [1/10], Batch [110/50000], Loss: 0.5352
Epoch [1/10], Batch [120/50000], Loss: 0.1995
Epoch [1/10], Batch [130/50000], Loss: 0.2199
Epoch [1/10], Batch [140/50000], Loss: 0.3306
Epoch [1/10], Batch [150/50000], Loss: 0.2423
Epoch [1/10], Batch [160/50000], Loss: 0.2561
Epoch [1/10], Batch [170/50000], Loss: 0.3639
Epoch [1/10], Batch [180/50000], Loss: 0.2307
Epoch [1/10], Batch [190/50000], Loss: 0.2748
Epoch [1/10], Batch [200/50000], Loss: 0.0986
Epoch [1/10], Batch [210/50000], Loss: 0.3191
Epoch [1/10], Batch [220/50000], Loss: 0.23