In [16]:
# CNN network with Simple Back Propagation to update Kernel weights
import numpy as np

input_image = np.array([
    [1, 2, 0, 3, 1, 2],
    [4, 5, 1, 0, 2, 3],
    [1, 0, 2, 4, 1, 0],
    [3, 1, 2, 1, 5, 2],
    [2, 4, 0, 2, 3, 1],
    [1, 3, 2, 0, 1, 4]
])
filter_kernel = np.array([
    [0.1, 0.2, -0.1],
    [0.0, 0.1, 0.2],
    [0.05, -0.2, 0.1]
])

# Hyper parameters
stride = 1
padding = 0
learning_rate = 0.01
feature_map_h = (((input_image.shape[0]-filter_kernel.shape[0])+(2*padding))//stride)+1
feature_map_w = (((input_image.shape[1]-filter_kernel.shape[1])+(2*padding))//stride)+1
target_output = np.ones((feature_map_h, feature_map_w)) # Dummy target output of same shape as the feature map
predicted_output = np.zeros((feature_map_h, feature_map_w))
filter_gradient = np.zeros_like(filter_kernel)
for i in range(target_output.shape[0]):
  for j in range(target_output.shape[1]):
    region = input_image[i:i+filter_kernel.shape[0], j:j+filter_kernel.shape[1]]
    predicted_output[i,j] = np.sum(region * filter_kernel)

# Compute the loss using Predicted and Target output using MSE calculation
#MSE = 1/n sum((target-predicted)**2)
# n = no of Datapoints, For 4x4 output it will have 16 datapoints
# sum = sum of all data points
loss = np.mean((target_output - predicted_output)**2)
print("\nPredicted Output:\n", predicted_output)
print("\nTarget Output:\n", target_output)
print("\nLoss:", loss)

# Backward Propagation to update the kernel weights
# Compute Gradient (Chain Rule)

# This is the derivative of the loss with respect to the predicted output.
#dLoss / dFilter = dLoss/dPred * dPred/dFilter
#dLoss/dPred = loss calculated using MSE
#dpred/dFilter = Value from input at that position
# Derivative formula (Basic Calculus ) w.r.t MSE formula = 2/n(predicted-true)
#updated_filter = old_filter - 0.01 * (predicted - true) * corresponding_input_region
n = target_output.size
dL_dO = (2 / n) * (predicted_output - target_output)
for i in range(dL_dO.shape[0]):
  for j in range(dL_dO.shape[1]):
     region = input_image[i:i+3, j:j+3]
     filter_gradient += dL_dO[i,j] * region # Chain rule: dL/dW = dL/dO * dO/dW

filter_kernel_updated = filter_kernel - learning_rate * filter_gradient

print("\nGradient of Filter:\n", filter_gradient)
print("\nUpdated Filter Weights:\n", filter_kernel_updated)








Predicted Output:
 [[ 1.45000000e+00 -5.55111512e-17  3.00000000e-01  1.10000000e+00]
 [ 1.85000000e+00  1.45000000e+00  9.00000000e-01 -5.50000000e-01]
 [-3.00000000e-01  8.00000000e-01  1.90000000e+00  1.10000000e+00]
 [ 3.50000000e-01  5.50000000e-01  9.00000000e-01  1.60000000e+00]]

Target Output:
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

Loss: 0.5365625

Gradient of Filter:
 [[ 0.4      0.64375 -1.38125]
 [-1.69375 -0.1125   0.50625]
 [-0.5875  -2.01875 -0.23125]]

Updated Filter Weights:
 [[ 0.096      0.1935625 -0.0861875]
 [ 0.0169375  0.101125   0.1949375]
 [ 0.055875  -0.1798125  0.1023125]]
