In [21]:
import numpy as np
import os
import cv2
from numpy.lib.stride_tricks import sliding_window_view
import warnings
import tensorflow as tf
from tensorflow import keras

CONVOLUTION:

In [22]:
def batch_convolution(data, filters, stride, padding='valid'):
    """
    Optimized batch convolution operation with 4D input support
    """
    # Input validation code remains unchanged
        # Input validation
    if not isinstance(data, np.ndarray) or data.ndim != 4:
        raise ValueError("Input data must be 4D numpy array [B, C, H, W]")
    
    if not isinstance(filters, np.ndarray) or filters.ndim != 4:
        raise ValueError("Filters must be 4D numpy array [N, C, FH, FW]")
    
    if filters.shape[1] != data.shape[1]:
        raise ValueError(f"Filter channels ({filters.shape[1]}) must match data channels ({data.shape[1]})")
    
    batch_size, in_channels, in_h, in_w = data.shape
    num_filters, _, fh, fw = filters.shape
    sh, sw = stride
    batch_size, in_channels, in_h, in_w = data.shape
    num_filters, _, fh, fw = filters.shape
    sh, sw = stride
    
    # Calculate output dimensions remains unchanged
    if padding == 'same':
        i=0
        # Same padding calculation code
        # ...
    elif padding == 'valid':
        out_h = (in_h - fh) // sh + 1
        out_w = (in_w - fw) // sw + 1
    else:
        raise ValueError("Padding must be 'valid' or 'same'")

    # Create sliding windows for each sample in the batch
    windows = sliding_window_view(data, (1, 1, fh, fw), axis=(0, 1, 2, 3))
    
    # Apply stride
    windows = windows[:, :, ::sh, ::sw, 0, :, :, :]
    
    # FIXED: Get actual window shape rather than trying to unpack
    window_shape = windows.shape
    
    # FIXED: Reshape using actual dimensions
    # Flatten batch, channels, and spatial dimensions
    windows_reshaped = windows.reshape(-1, in_channels * fh * fw)
    
    # Reshape filters to match inner dimension
    filters_reshaped = filters.reshape(num_filters, in_channels * fh * fw)
    
    # Matrix multiplication with matching dimensions
    output = np.matmul(windows_reshaped, filters_reshaped.T)
    
    # Reshape output to proper dimensions
    output = output.reshape(batch_size, out_h, out_w, num_filters)
    output = output.transpose(0, 3, 1, 2)  # NHWC -> NCHW
    
    return output


DECONVOLUTION:

In [23]:
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

def batch_deconvolution(data, filters, stride, padding='valid', output_padding=(0, 0)):
    """
    Vectorized transposed convolution (deconvolution) with batch support

    Parameters:
    data : np.ndarray [batch_size, in_channels, in_height, in_width]
    filters : np.ndarray [out_channels, in_channels, fh, fw]
    stride : tuple (sh, sw)
    padding : 'valid' or 'same'
    output_padding : tuple (oph, opw)

    Returns:
    output : np.ndarray [batch_size, out_channels, out_height, out_width]
    """
    if data.ndim != 4 or filters.ndim != 4:
        raise ValueError("Inputs must be 4D arrays")

    batch_size, in_chan, in_h, in_w = data.shape
    out_chan, _, fh, fw = filters.shape
    sh, sw = stride
    oph, opw = output_padding

    # Calculate expected output shape
    if padding == 'same':
        out_h = in_h * sh + oph
        out_w = in_w * sw + opw
        pad_h = (in_h - 1) * sh + fh - out_h + oph
        pad_w = (in_w - 1) * sw + fw - out_w + opw
        pad_h = max(pad_h, 0)
        pad_w = max(pad_w, 0)
    elif padding == 'valid':
        out_h = (in_h - 1) * sh + fh + oph
        out_w = (in_w - 1) * sw + fw + opw
        pad_h = pad_w = 0
    else:
        raise ValueError("Padding must be 'valid' or 'same'")

    # Step 1: Upsample
    up_h = (in_h - 1) * sh + 1
    up_w = (in_w - 1) * sw + 1
    upsampled = np.zeros((batch_size, in_chan, up_h, up_w), dtype=data.dtype)
    upsampled[:, :, ::sh, ::sw] = data

    # Step 2: Pad
    pad_top = pad_h // 2
    pad_bottom = pad_h - pad_top
    pad_left = pad_w // 2
    pad_right = pad_w - pad_left
    padded = np.pad(
        upsampled,
        [(0, 0), (0, 0), (pad_top, pad_bottom), (pad_left, pad_right)],
        mode='constant'
    )

    # Step 3: Adjust padding if needed to match output shape
    needed_h = out_h + fh - 1
    needed_w = out_w + fw - 1
    cur_h, cur_w = padded.shape[2], padded.shape[3]
    extra_pad_h = max(0, needed_h - cur_h)
    extra_pad_w = max(0, needed_w - cur_w)

    if extra_pad_h > 0 or extra_pad_w > 0:
        padded = np.pad(
            padded,
            [(0, 0), (0, 0), (0, extra_pad_h), (0, extra_pad_w)],
            mode='constant'
        )

    # Step 4: Sliding window view
    windows = sliding_window_view(padded, (fh, fw), axis=(2, 3))  # shape: [B, C, H_out, W_out, fh, fw]

    # Step 5: Apply filters with einsum
    output = np.einsum('bcijxy,ocxy->boij', windows, filters)

    # Final sanity check
    assert output.shape[2] == out_h and output.shape[3] == out_w, \
        f"Deconv output shape mismatch: got {output.shape[2:]}, expected ({out_h}, {out_w})"
    
    return output



RELU:

In [24]:
def relu(data):
    return np.maximum(0.0,data)

BATCH NORMALIZATION:

In [25]:
import numpy as np

def batch_norm_forward(x, gamma, beta, running_mean, running_var, momentum=0.99, epsilon=1e-3, training=True):
    """
    Batch normalization forward pass for 4D input [batch, channels, height, width].

    Inputs:
    - x: Input data, shape (N, C, H, W)
    - gamma: Scale parameter, shape (C,)
    - beta: Shift parameter, shape (C,)
    - running_mean: Running mean (for inference), shape (C,)
    - running_var: Running variance (for inference), shape (C,)
    - momentum: Momentum for running mean/variance.
    - epsilon: Small float for numerical stability.
    - training: Boolean, True for training mode, False for inference.

    Returns:
    - out: Batch-normalized output, same shape as x
    - cache: Values needed for backward pass (used only in training)
    - updated_running_mean, updated_running_var: Updated running statistics
    """
    N, C, H, W = x.shape
    if training:
        # Compute mean and variance across batch, height, and width (per channel)
        mean = np.mean(x, axis=(0, 2, 3), keepdims=True)  # shape (1, C, 1, 1)
        var = np.var(x, axis=(0, 2, 3), keepdims=True)    # shape (1, C, 1, 1)

        # Normalize
        x_hat = (x - mean) / np.sqrt(var + epsilon)
        out = gamma.reshape(1, C, 1, 1) * x_hat + beta.reshape(1, C, 1, 1)

        # Update running stats
        updated_running_mean = momentum * running_mean + (1 - momentum) * mean.squeeze()
        updated_running_var = momentum * running_var + (1 - momentum) * var.squeeze()

        cache = (x, x_hat, mean, var, gamma, beta, epsilon)
    else:
        # Use running statistics for inference
        mean = running_mean.reshape(1, C, 1, 1)
        var = running_var.reshape(1, C, 1, 1)
        x_hat = (x - mean) / np.sqrt(var + epsilon)
        out = gamma.reshape(1, C, 1, 1) * x_hat + beta.reshape(1, C, 1, 1)

        updated_running_mean = running_mean
        updated_running_var = running_var
        cache = None  # Not needed in inference

    return out, cache, updated_running_mean, updated_running_var


DERIVATIVE FOR BATCH NORMALIZATION:

In [26]:
def batch_norm_backward(dout, cache):
    """
    Backward pass for batch normalization (channels first).

    Inputs:
    - dout: Upstream gradient, shape (N, C, H, W)
    - cache: Tuple from forward pass

    Returns:
    - dx: Gradient w.r.t. input x, shape (N, C, H, W)
    - dgamma: Gradient w.r.t. gamma, shape (C,)
    - dbeta: Gradient w.r.t. beta, shape (C,)
    """
    x, x_hat, mean, var, gamma, beta, epsilon = cache
    N, C, H, W = dout.shape
    size = N * H * W

    # Gradients of beta and gamma
    dbeta = np.sum(dout, axis=(0, 2, 3))
    dgamma = np.sum(dout * x_hat, axis=(0, 2, 3))

    # Gradient w.r.t. x_hat
    dx_hat = dout * gamma.reshape(1, C, 1, 1)

    # Intermediate gradients
    dvar = np.sum(dx_hat * (x - mean) * -0.5 * np.power(var + epsilon, -1.5), axis=(0, 2, 3), keepdims=True)
    dmean = np.sum(dx_hat * -1 / np.sqrt(var + epsilon), axis=(0, 2, 3), keepdims=True) + \
            dvar * np.sum(-2 * (x - mean), axis=(0, 2, 3), keepdims=True) / size

    # Gradient w.r.t. input x
    dx = dx_hat / np.sqrt(var + epsilon) + dvar * 2 * (x - mean) / size + dmean / size

    return dx, dgamma, dbeta

DERIVATIVE FOR RELU:

In [27]:
def relu_backward(data):
    return (data>0).astype(float)

DERIVATIVE FOR CONVOLUTION:

In [28]:
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

def conv_backward(dout, x, w, stride, padding='valid'):
    """
    Vectorized backward pass for batch convolution.
    """
    batch_size, in_chan, h_in, w_in = x.shape
    num_filters, _, fh, fw = w.shape
    sh, sw = stride
    dil_err_ip_shape=(batch_size ,num_filters ,int((h_in-1)*1+fh), int((w_in-1)*1+fw))
    dil_err_fl_shape=(batch_size, num_filters, int(h_in-1*(fh-1)), int(w_in-1*(fw-1)))
    dil_err_fl=np.zeros(dil_err_fl_shape)
    dil_err_ip=np.zeros(dil_err_ip_shape)
    dil_err_ip[:, :, (fh-1):-(fh-1):sh, (fw-1):-(fw-1):sw]=dout
    dil_err_fl[:, :, 0::sh, 0::sw]=dout
    w_dx=np.transpose(w,(1,0,3,2))
    dx=batch_convolution(dil_err_ip, w_dx, (1,1), 'valid')
    x_dw=np.transpose(x, (1,0,2,3))
    filt_dw=np.transpose(dil_err_fl, (1,0,2,3))
    dw_temp=batch_convolution(x_dw, filt_dw, (1,1), 'valid')
    dw=np.transpose(dw_temp, (1,0,2,3))
    #print(np.mean(np.abs(dw)))
    
    return dx, dw

import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

def conv_backward(dout, x, W, stride, padding='valid'):
    """
    Vectorized backward pass for batch convolution.
    """
    batch_size, in_chan, h_in, w_in = x.shape
    num_filters, _, fh, fw = W.shape
    sh, sw = stride

    # === Padding for x (input) ===
    if padding == 'same':
        out_h = int(np.ceil(h_in / sh))
        out_w = int(np.ceil(w_in / sw))
        pad_h = max((out_h - 1) * sh + fh - h_in, 0)
        pad_w = max((out_w - 1) * sw + fw - w_in, 0)
        pad_top = pad_h // 2
        pad_bottom = pad_h - pad_top
        pad_left = pad_w // 2
        pad_right = pad_w - pad_left
        x_padded = np.pad(x, [(0,0), (0,0), (pad_top, pad_bottom), (pad_left, pad_right)])
    else:
        x_padded = x
        out_h = (h_in - fh) // sh + 1
        out_w = (w_in - fw) // sw + 1

    # === dW: Vectorized filter gradient ===
    # Extract input patches as in the forward pass
    x_windows = sliding_window_view(x_padded, (fh, fw), axis=(2, 3))
    x_windows = x_windows[:, :, ::sh, ::sw, :, :]  # Apply stride
    # x_windows shape: (B, in_chan, out_h, out_w, fh, fw)
    # dout shape: (B, num_filters, out_h, out_w)
    # We want: (num_filters, in_chan, fh, fw)
    dW = np.einsum('b o h w, b c h w f g -> o c f g', dout, x_windows)

    # === dx: Vectorized input gradient ===
    # For dx, we need to convolve dout with flipped filters
    W_flip = np.flip(W, axis=(2, 3)).swapaxes(0,1)  # (in_chan, num_filters, fh, fw)
    # Upsample dout to account for stride
    H_out, W_out = dout.shape[2], dout.shape[3]
    H_upsampled = (H_out - 1) * sh + 1
    W_upsampled = (W_out - 1) * sw + 1
    dout_upsampled = np.zeros((batch_size, num_filters, H_upsampled, W_upsampled), dtype=dout.dtype)
    dout_upsampled[:, :, ::sh, ::sw] = dout

    # Pad dout_upsampled for full convolution
    pad_h = fh - 1
    pad_w = fw - 1
    dout_padded = np.pad(dout_upsampled, [(0,0), (0,0), (pad_h, pad_h), (pad_w, pad_w)])

    # Extract sliding windows from padded dout
    dout_windows = sliding_window_view(dout_padded, (fh, fw), axis=(2,3))
    # dout_windows shape: (B, num_filters, h_in, w_in, fh, fw)
    # W_flip shape: (in_chan, num_filters, fh, fw)
    # We want: (B, in_chan, h_in, w_in)
    dx = np.einsum('b o h w f g, c o f g -> b c h w', dout_windows, W_flip)

    # Remove padding from dx if 'same'
    if padding == 'same':
        dx = dx[:, :, :h_in, :w_in]

    return dx, dW


DERIVATIVE FOR DECONVOLUTION:

In [29]:
from scipy.signal import correlate2d

In [30]:
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view
from scipy.signal import correlate2d

def deconv_backward(dout, x, W, stride, padding='valid', output_padding=(0,0)):
    B, in_C, H_in, W_in = x.shape
    out_C, _, fh, fw = W.shape
    sh, sw = stride
    oph, opw = output_padding

    # Gradient w.r.t. input (dx)
    W_flipped = np.flip(W, axis=(2,3)).transpose(1, 0, 2, 3)  # [in_C, out_C, fh, fw]

    if padding == 'same':
        pad_h = fh - 1
        pad_w = fw - 1
    elif padding == 'valid':
        pad_h = fh - 1 + oph
        pad_w = fw - 1 + opw
    else:
        raise ValueError("Padding must be 'valid' or 'same'")

    pad_top = pad_h // 2
    pad_bottom = pad_h - pad_top
    pad_left = pad_w // 2
    pad_right = pad_w - pad_left

    dout_padded = np.pad(dout, [(0,0), (0,0), (pad_top, pad_bottom), (pad_left, pad_right)], mode='constant')

    dx = np.zeros((B, in_C, H_in, W_in))
    for b in range(B):
        for i in range(in_C):
            for o in range(out_C):
                conv_result = correlate2d(dout[b, o], W_flipped[i, o], mode='valid')
                dx[b, i] += conv_result[::sh, ::sw]

    # Gradient w.r.t. weights (dW)
    # Calculate required padded size
    H_up = (H_in - 1) * sh + 1
    W_up = (W_in - 1) * sw + 1
    x_upsampled = np.zeros((B, in_C, H_up, W_up))
    x_upsampled[:, :, ::sh, ::sw] = x    
    required_h = dout.shape[2] + fh - 1
    required_w = dout.shape[3] + fw - 1

    cur_h, cur_w = x_upsampled.shape[2], x_upsampled.shape[3]
    pad_h = required_h - cur_h
    pad_w = required_w - cur_w

    pad_top = pad_h // 2
    pad_bottom = pad_h - pad_top
    pad_left = pad_w // 2
    pad_right = pad_w - pad_left

    if pad_h > 0 or pad_w > 0:
        x_upsampled = np.pad(
            x_upsampled,
            [(0, 0), (0, 0), (pad_top, pad_bottom), (pad_left, pad_right)],
            mode='constant'
        )

    x_windows = sliding_window_view(x_upsampled, (fh, fw), axis=(2, 3))  # [B, in_C, H_out, W_out, fh, fw]

    H_out, W_out = dout.shape[2], dout.shape[3]
    if x_windows.shape[2] != H_out or x_windows.shape[3] != W_out:
        x_windows = x_windows[:, :, :H_out, :W_out, :, :]

    dW = np.einsum('bohw,bchwxy->ocxy', dout, x_windows)
    #print(np.mean(np.abs(dW)))

    return dx, dW


ERROR METRICS:

In [31]:
def hybrid_loss(y_true, y_pred):
    mse = tf.keras.losses.MeanSquaredError()(y_true, y_pred)
    vgg = tf.keras.applications.VGG16(include_top=False, weights='imagenet')
    perceptual_model = tf.keras.Model(vgg.input, vgg.get_layer('block3_conv3').output)
    pl = tf.reduce_mean(tf.square(perceptual_model(y_true) - perceptual_model(y_pred)))
    return 0.7*pl + 0.3*mse

In [32]:
def square_error(IP,OP):
    error=(IP-OP)**2
    return error

MODEL:

In [33]:
dataset_path="D:\REQS\PROJECT\dataset\\train"

#Hypreparameters
epochs=10
back_spin=5
learning_rate=0.001
e=0.00001
batch_size=1
image_size=[3,32,32]

In [34]:
#MODEL:2 -- THE BASE
#bn is srtuctured as gamma, beta, rm, rv
s_all=[2,2]
#3x32x32
conv_f1=np.random.randn(8,3,4,4)* np.sqrt(2.0/(3*4*4))
bn_f1=np.zeros((4,8),dtype=np.float32)
bn_f1[0,:]=1
bn_f1[3,:]=1
#8x15x15
conv_f2=np.random.randn(16,8,3,3)* np.sqrt(2.0/(8*3*3))
bn_f2=np.zeros((4,16),dtype=np.float32)
bn_f2[0,:]=1
bn_f2[3,:]=1
#16x7x7
deconv_f3=np.random.randn(10,16,5,5)* np.sqrt(2.0/(12*5*5))
bn_f3=np.zeros((4,10),dtype=np.float32)
bn_f3[0,:]=1
bn_f3[3,:]=1
#10x17x17
deconv_f4=np.random.randn(6,10,4,4)* np.sqrt(2.0/(3*4*4))
bn_f4=np.zeros((4,6),dtype=np.float32)
bn_f4[0,:]=1
bn_f4[3,:]=1
#6x36x36
deconv_f5=np.random.randn(3,6,5,5)* np.sqrt(2.0/(3*5*5))
bn_f5=np.zeros((4,3),dtype=np.float32)
bn_f5[0,:]=1
bn_f5[3,:]=1
#3x75x75
conv_f6=np.random.randn(6,3,5,5)* np.sqrt(2.0/(3*5*5))
bn_f6=np.zeros((4,6),dtype=np.float32)
bn_f6[0,:]=1
bn_f6[3,:]=1
#6x36x36
conv_f7=np.random.randn(10,6,4,4)* np.sqrt(2.0/(3*4*4))
bn_f7=np.zeros((4,10),dtype=np.float32)
bn_f7[0,:]=1
bn_f7[3,:]=1
#10x17x17
conv_f8=np.random.randn(16,10,5,5)* np.sqrt(2.0/(3*5*5))
bn_f8=np.zeros((4,16),dtype=np.float32)
bn_f8[0,:]=1
bn_f8[3,:]=1
#16x7x7
deconv_f9=np.random.randn(8,16,3,3)* np.sqrt(2.0/(3*4*4))
bn_f9=np.zeros((4,8),dtype=np.float32)
bn_f9[0,:]=1
bn_f9[3,:]=1
#8x15x15
deconv_f10=np.random.randn(3,8,4,4)* np.sqrt(2.0/(3*4*4))
bn_f10=np.zeros((4,3),dtype=np.float32)
bn_f10[0,:]=1
bn_f10[3,:]=1
#3x32x32

In [35]:
#flow 1,2,3,4,5,6,7,8,9,10
def forward(IP):
    a1=batch_convolution(IP,conv_f1,(2,2),'valid')
    bn_a1, cache_a1, bn_f1[2], bn_f1[3]=batch_norm_forward(a1, bn_f1[0], bn_f1[1], bn_f1[2], bn_f1[3], 0.99, 0.001, True)
    act_a1=relu(bn_a1)
    #print(act_a1.shape)
    a2=batch_convolution(act_a1,conv_f2,(2,2),'valid')
    bn_a2, cache_a2, bn_f2[2], bn_f2[3]=batch_norm_forward(a2, bn_f2[0], bn_f2[1], bn_f2[2], bn_f2[3], 0.99, 0.001, True)
    act_a2=relu(bn_a2)
    #print(act_a2.shape)
    a3=batch_deconvolution(act_a2,deconv_f3,(2,2),'valid',(0,0))
    bn_a3, cache_a3, bn_f3[2], bn_f3[3]=batch_norm_forward(a3, bn_f3[0], bn_f3[1], bn_f3[2], bn_f3[3], 0.99, 0.001, True)
    act_a3=relu(bn_a3)
    #print(act_a3.shape)
    a4=batch_deconvolution(act_a3,deconv_f4,(2,2),'valid',(0,0))
    bn_a4, cache_a4, bn_f4[2], bn_f4[3]=batch_norm_forward(a4, bn_f4[0], bn_f4[1], bn_f4[2], bn_f4[3], 0.99, 0.001, True)
    act_a4=relu(bn_a4)
    #print(act_a4.shape)
    a5=batch_deconvolution(act_a4,deconv_f5,(2,2),'valid',(0,0))
    bn_a5, cache_a5, bn_f5[2], bn_f5[3]=batch_norm_forward(a5, bn_f5[0], bn_f5[1], bn_f5[2], bn_f5[3], 0.99, 0.001, True)
    act_a5=relu(bn_a5)
    #print(act_a5.shape)
    a6=batch_convolution(act_a5,conv_f6,(2,2),'valid')
    bn_a6, cache_a6, bn_f6[2], bn_f6[3]=batch_norm_forward(a6, bn_f6[0], bn_f6[1], bn_f6[2], bn_f6[3], 0.99, 0.001, True)
    act_a6=relu(bn_a6)
    #print(act_a6.shape)
    a7=batch_convolution(act_a6,conv_f7,(2,2),'valid')
    bn_a7, cache_a7, bn_f7[2], bn_f7[3]=batch_norm_forward(a7, bn_f7[0], bn_f7[1], bn_f7[2], bn_f7[3], 0.99, 0.001, True)
    act_a7=relu(bn_a7)
    #print(act_a7.shape)
    a8=batch_convolution(act_a7,conv_f8,(2,2),'valid')
    bn_a8, cache_a8, bn_f8[2], bn_f8[3]=batch_norm_forward(a8, bn_f8[0], bn_f8[1], bn_f8[2], bn_f8[3], 0.99, 0.001, True)
    act_a8=relu(bn_a8)
    a9=batch_deconvolution(act_a8,deconv_f9,(2,2),'valid',(0,0))
    bn_a9, cache_a9, bn_f9[2], bn_f9[3]=batch_norm_forward(a9, bn_f9[0], bn_f9[1], bn_f9[2], bn_f9[3], 0.99, 0.001, True)
    act_a9=relu(bn_a9)
    a10=batch_deconvolution(act_a9,deconv_f10,(2,2),'valid',(0,0))
    bn_a10, cache_a10, bn_f10[2], bn_f10[3]=batch_norm_forward(a10, bn_f10[0], bn_f10[1], bn_f10[2], bn_f10[3], 0.99, 0.001, True)
    act_a10=relu(bn_a10)
    
    return act_a10, bn_a10, cache_a10, a10, act_a9, bn_a9, cache_a9, a9, act_a8, bn_a8, cache_a8, a8, act_a7, bn_a7, cache_a7, a7, act_a6, bn_a6, cache_a6, a6, act_a5, bn_a5, cache_a5, a5, act_a4, bn_a4, cache_a4, a4, act_a3, bn_a3, cache_a3, a3, act_a2, bn_a2, cache_a2, a2, act_a1, bn_a1, cache_a1, a1

In [36]:
def m1_backward(IP, layers, losse):

    d_bn_f10=np.zeros((4,3),dtype=np.float32)
    d_bn_f9=np.zeros((4,8),dtype=np.float32)
    d_bn_f8=np.zeros((4,16),dtype=np.float32)
    d_bn_f7=np.zeros((4,10),dtype=np.float32)
    d_bn_f6=np.zeros((4,6),dtype=np.float32)
    d_bn_f5=np.zeros((4,3),dtype=np.float32)
    d_bn_f4=np.zeros((4,6),dtype=np.float32)
    d_bn_f3=np.zeros((4,10),dtype=np.float32)
    d_bn_f2=np.zeros((4,16),dtype=np.float32)
    d_bn_f1=np.zeros((4,8),dtype=np.float32)

    act_a10, bn_a10, cache_a10, a10, act_a9, bn_a9, cache_a9, a9, act_a8, bn_a8, cache_a8, a8, act_a7, bn_a7, cache_a7, a7, act_a6, bn_a6, cache_a6, a6, act_a5, bn_a5, cache_a5, a5, act_a4, bn_a4, cache_a4, a4, act_a3, bn_a3, cache_a3, a3, act_a2, bn_a2, cache_a2, a2, act_a1, bn_a1, cache_a1, a1=layers
    d_act_a10=losse
    d_bn_a10=relu_backward(d_act_a10)
    d_a10, d_bn_f10[0], d_bn_f10[1]=batch_norm_backward(d_bn_a10, cache_a10)
    d_act_a9, d_deconv_f10=deconv_backward(d_a10, act_a9, deconv_f10, (2,2))
    #print(d_act_a9.shape)
    d_bn_a9=relu_backward(d_act_a9)
    d_a9, d_bn_f9[0], d_bn_f9[1]=batch_norm_backward(d_bn_a9, cache_a9)
    d_act_a8, d_deconv_f9=deconv_backward(d_a9, act_a8, deconv_f9, (2,2))
    #print(d_act_a8.shape)
    d_bn_a8=relu_backward(d_act_a8)
    d_a8, d_bn_f8[0], d_bn_f8[1]=batch_norm_backward(d_bn_a8, cache_a8)
    d_act_a7, d_conv_f8=conv_backward(d_a8, act_a7, conv_f8, (2,2))
    #print(d_act_a7.shape)
    d_bn_a7=relu_backward(d_act_a7)
    d_a7, d_bn_f7[0], d_bn_f7[1]=batch_norm_backward(d_bn_a7, cache_a7)
    d_act_a6, d_conv_f7=conv_backward(d_a7, act_a6, conv_f7, (2,2))
    #print(d_act_a6.shape)
    d_bn_a6=relu_backward(d_act_a6)
    d_a6, d_bn_f6[0], d_bn_f6[1]=batch_norm_backward(d_bn_a6, cache_a6)
    d_act_a5, d_conv_f6=conv_backward(d_a6, act_a5, conv_f6, (2,2))
    #print(d_act_a5.shape)
    d_bn_a5=relu_backward(d_act_a5)
    d_a5, d_bn_f5[0], d_bn_f5[0]=batch_norm_backward(d_bn_a5, cache_a5)
    d_act_a4, d_deconv_f5=deconv_backward(d_a5, act_a4, deconv_f5, (2,2))
    #print(d_act_a4.shape)
    d_bn_a4=relu_backward(d_act_a4)
    d_a4, d_bn_f4[0], d_bn_f4[1]=batch_norm_backward(d_bn_a4, cache_a4)
    d_act_a3, d_deconv_f4=deconv_backward(d_a4, act_a3, deconv_f4, (2,2))
    #print(d_act_a3.shape)
    d_bn_a3=relu_backward(d_act_a3)
    d_a3, d_bn_f3[0], d_bn_f3[1]=batch_norm_backward(d_bn_a3, cache_a3)
    d_act_a2, d_deconv_f3=deconv_backward(d_a3, act_a2, deconv_f3, (2,2))
    #print(d_act_a2.shape)
    d_bn_a2=relu_backward(d_act_a2)
    d_a2, d_bn_f2[0], d_bn_f2[1]=batch_norm_backward(d_bn_a2, cache_a2)
    d_act_a1, d_conv_f2=conv_backward(d_a2, act_a1, conv_f2, (2,2))
    #print(d_act_a1.shape)
    d_bn_a1=relu_backward(d_act_a1)
    d_a1, d_bn_f1[0], d_bn_f1[1]=batch_norm_backward(d_bn_a1, cache_a1)
    d_IP, d_conv_f1=conv_backward(d_a1, IP, conv_f1, (2,2))
    return d_conv_f1, d_bn_f1, d_conv_f2, d_bn_f2, d_deconv_f3, d_bn_f3, d_deconv_f4, d_bn_f4, d_deconv_f5, d_bn_f5, d_conv_f6, d_bn_f6, d_conv_f7, d_bn_f7, d_conv_f8, d_bn_f8, d_deconv_f9, d_bn_f9, d_deconv_f10, d_bn_f10

In [37]:
#Update Parameters
def update_params(diffs, lr, conv_f1, conv_f2, deconv_f3, deconv_f4, deconv_f5, conv_f6, conv_f7, conv_f8, deconv_f9, deconv_f10, bn_f1, bn_f2, bn_f3, bn_f4, bn_f5, bn_f6, bn_f7, bn_f8, bn_f9, bn_f10):
    d_conv_f1, d_bn_f1, d_conv_f2, d_bn_f2, d_deconv_f3, d_bn_f3, d_deconv_f4, d_bn_f4, d_deconv_f5, d_bn_f5, d_conv_f6, d_bn_f6, d_conv_f7, d_bn_f7, d_conv_f8, d_bn_f8, d_deconv_f9, d_bn_f9, d_deconv_f10, d_bn_f10=diffs
    conv_f1-=lr*d_conv_f1
    bn_f1-=lr*d_bn_f1
    conv_f2-=lr*d_conv_f2
    bn_f2-=lr*d_bn_f2
    deconv_f3-=lr*d_deconv_f3
    bn_f3-=lr*d_bn_f3
    deconv_f4-=lr*d_deconv_f4
    bn_f4-=lr*d_bn_f4
    deconv_f5-=lr*d_deconv_f5
    bn_f5-=lr*d_bn_f5
    conv_f6-=lr*d_conv_f6
    bn_f6-=lr*d_bn_f6
    conv_f7-=lr*d_conv_f7
    bn_f7-=lr*d_bn_f7
    conv_f8-=lr*d_conv_f8
    bn_f8-=lr*d_bn_f8
    deconv_f9-=lr*d_deconv_f9
    bn_f9-=lr*d_bn_f9
    deconv_f10-=lr*d_deconv_f10
    bn_f10-=lr*d_bn_f10
    return

    print("d_conv_f1 - ",np.mean(np.abs(d_conv_f1)))
    print("d_conv_f2 - ",np.mean(np.abs(d_conv_f2)))
    print("d_deconv_f3 - ",np.mean(np.abs(d_deconv_f3)))
    print("d_deconv_f4 - ",np.mean(np.abs(d_deconv_f4)))
    print("d_deconv_f5 - ",np.mean(np.abs(d_deconv_f5)))
    print("d_conv_f6 - ",np.mean(np.abs(d_conv_f6)))
    print("d_conv_f7 - ",np.mean(np.abs(d_conv_f7)))
    print("d_conv_f8 - ",np.mean(np.abs(d_conv_f8)))
    print("d_deconv_f9 - ",np.mean(np.abs(d_deconv_f9)))
    print("d_deconv_f10 - ",np.mean(np.abs(d_deconv_f10)))
    print("d_bn_f1 - ",np.mean(np.abs(d_bn_f1)))
    print("d_bn_f2 - ",np.mean(np.abs(d_bn_f2)))
    print("d_bn_f3 - ",np.mean(np.abs(d_bn_f3)))
    print("d_bn_f4 - ",np.mean(np.abs(d_bn_f4)))
    print("d_bn_f5 - ",np.mean(np.abs(d_bn_f5)))
    print("d_bn_f6 - ",np.mean(np.abs(d_bn_f6)))
    print("d_bn_f7 - ",np.mean(np.abs(d_bn_f7)))
    print("d_bn_f8 - ",np.mean(np.abs(d_bn_f8)))
    print("d_bn_f9 - ",np.mean(np.abs(d_bn_f9)))
    print("d_bn_f10 - ",np.mean(np.abs(d_bn_f10)))
    print("\n")
    print("-_-_-_-_-_-_-_-_-_")
    print("\n")

In [38]:
def load_image_paths():
    """Loads all image file paths from the dataset directory."""
    supported_formats = ('.png', '.jpg', '.jpeg', '.bmp')
    image_paths = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) 
                    if f.endswith(supported_formats)]
    return image_paths
    
def load_image(image_path):
    """Loads an image, resizes it, and normalizes pixel values to the range [0, 1]."""
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not load image: {image_path}")
    #Eimage = cv2.resize(image, image_size)
    image = image.astype(np.float64) / 255.0  # Normalize to [0, 1]
    return image


In [39]:
def get_batch(image_paths):
    """Generates a batch of images for training."""
    batch_paths = np.random.choice(image_paths, batch_size, replace=False)
    images = np.array([load_image(path) for path in batch_paths])
    return images
    
image_paths = load_image_paths()
for epoch in range(epochs):
    total_loss = 0
    num_batches = len(image_paths) // batch_size
        
    for batch_index in range(num_batches):
        batch = get_batch(image_paths)
        batch = np.transpose(batch, (0,3,1,2))  # Proper channel-first format
        # Forward pass through the model
        results = forward(batch)
        act_a10, bn_a10, cache_a10, a10, act_a9, bn_a9, cache_a9, a9, act_a8, bn_a8, cache_a8, a8, act_a7, bn_a7, cache_a7, a7, act_a6, bn_a6, cache_a6, a6, act_a5, bn_a5, cache_a5, a5, act_a4, bn_a4, cache_a4, a4, act_a3, bn_a3, cache_a3, a3, act_a2, bn_a2, cache_a2, a2, act_a1, bn_a1, cache_a1, a1=results
            
        # Compute error (assume the error function is already implemented in the model)
        losse = square_error(act_a10, batch)
        loss =np.mean(losse)
        # Backward pass to compute gradients
        box=m1_backward(batch, results, losse)
            
        # Update model parameters (assume the model has a method to update its parameters)
        update_params(box, learning_rate, conv_f1, conv_f2, deconv_f3, deconv_f4, deconv_f5, conv_f6, conv_f7, conv_f8, deconv_f9, deconv_f10, bn_f1, bn_f2, bn_f3, bn_f4, bn_f5, bn_f6, bn_f7, bn_f8, bn_f9, bn_f10)
        
        total_loss += loss

        if (batch_index + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_index+1}/{num_batches}], Loss: {loss:.4f}")
            #print("\n")
            #print("-_-_-_-_-_-_-_-_-_")
            #print("\n")
        
    avg_loss = total_loss / num_batches
    print(f"Epoch [{epoch+1}/{epochs}] completed. Average Loss: {avg_loss:.4f}")

Epoch [1/10], Batch [10/50000], Loss: 0.1703
Epoch [1/10], Batch [20/50000], Loss: 0.1800
Epoch [1/10], Batch [30/50000], Loss: 0.1375
Epoch [1/10], Batch [40/50000], Loss: 0.2302
Epoch [1/10], Batch [50/50000], Loss: 0.2666
Epoch [1/10], Batch [60/50000], Loss: 0.4704
Epoch [1/10], Batch [70/50000], Loss: 0.3747
Epoch [1/10], Batch [80/50000], Loss: 0.2040
Epoch [1/10], Batch [90/50000], Loss: 0.1873
Epoch [1/10], Batch [100/50000], Loss: 0.2652
Epoch [1/10], Batch [110/50000], Loss: 0.5348
Epoch [1/10], Batch [120/50000], Loss: 0.4488
Epoch [1/10], Batch [130/50000], Loss: 0.2905
Epoch [1/10], Batch [140/50000], Loss: 0.2922
Epoch [1/10], Batch [150/50000], Loss: 0.1778
Epoch [1/10], Batch [160/50000], Loss: 0.1479
Epoch [1/10], Batch [170/50000], Loss: 0.1979
Epoch [1/10], Batch [180/50000], Loss: 0.1799
Epoch [1/10], Batch [190/50000], Loss: 0.3112
Epoch [1/10], Batch [200/50000], Loss: 0.2753
Epoch [1/10], Batch [210/50000], Loss: 0.1513
Epoch [1/10], Batch [220/50000], Loss: 0.15

In [40]:
params = [
    ('conv_f1', conv_f1), ('bn_f1', bn_f1),
    ('conv_f2', conv_f2), ('bn_f2', bn_f2),
    ('deconv_f3', deconv_f3), ('bn_f3', bn_f3),
    ('deconv_f4', deconv_f4), ('bn_f4', bn_f4),
    ('deconv_f5', deconv_f5), ('bn_f5', bn_f5),
    ('conv_f6', conv_f6), ('bn_f6', bn_f6),
    ('conv_f7', conv_f7), ('bn_f7', bn_f7),
    ('conv_f8', conv_f8), ('bn_f8', bn_f8),
    ('deconv_f9', deconv_f9), ('bn_f9', bn_f9),
    ('deconv_f10', deconv_f10), ('bn_f10', bn_f10)
]

print(",\n".join([f"{name}: {value}" for name, value in params]))


conv_f1: [[[[ 5.31656622e+01  5.42138253e+01  5.42412280e+01  5.31438875e+01]
   [ 4.75292060e+01  4.76700218e+01  4.74144401e+01  4.68595870e+01]
   [ 4.01848521e+01  4.02126620e+01  3.94864755e+01  3.83709763e+01]
   [ 3.16312398e+01  3.10148893e+01  3.00254838e+01  2.89290665e+01]]

  [[ 4.40021149e+01  4.54649160e+01  4.37126724e+01  4.27189932e+01]
   [ 3.90582981e+01  3.86657660e+01  3.77615748e+01  3.67539905e+01]
   [ 3.12211891e+01  3.13467742e+01  3.06674442e+01  2.88949817e+01]
   [ 2.34916901e+01  2.29778869e+01  2.14931892e+01  2.02296627e+01]]

  [[ 2.56921706e+01  2.79319476e+01  2.84895341e+01  2.87942816e+01]
   [ 2.38404293e+01  2.53093286e+01  2.60868436e+01  2.64117844e+01]
   [ 1.98915323e+01  2.11750238e+01  2.20217564e+01  2.25883154e+01]
   [ 1.51990331e+01  1.60173121e+01  1.57208440e+01  1.66109817e+01]]]


 [[[ 4.38263703e+00  4.01928148e+00  3.55328853e+00  2.34923848e+00]
   [ 4.10643324e+00  3.66676926e+00  3.82112222e+00  2.37528215e+00]
   [ 5.64688504e+