In [17]:
import random
import math
import torch

# Define the sigmoid function
def sigmoid(value):
    return 1 / (1 + math.exp(-value))

# Manual Backpropagation
def manual_backpropagation(x, A, B, C):
    K = len(x)
    
    # Forward propagation
    y = [0] * K
    for i in range(K):
        y[i] = sum(A[i][j] * x[j] for j in range(K))
        y[i] = sigmoid(y[i])

    v = [0] * K
    for i in range(K):
        v[i] = sum(B[i][j] * x[j] for j in range(K))

    z = [u + v for u, v in zip(y, v)]

    w = [0] * K
    for i in range(K):
        w[i] = sum(C[i][j] * z[j] for j in range(K))

    for val in w:
        L = sum(val ** 2 for val in w)

    print(f"Loss Manual2: {L}")
    
    # Backpropagation
    # Compute gradients ∂L/∂A, ∂L/∂B, ∂L/∂C
    dL_dw = [2 * val for val in w]
    dL_dC = [[dL_dw[i] * z[j] for j in range(K)] for i in range(K)]

    dL_dz = [0] * K
    for i in range(K):
        dL_dz[i] = sum(dL_dw[j] * C[i][j] for j in range(K))

    dL_dv = [0] * K
    for i in range(K):
        dL_dv[i] = dL_dz[i]

    dL_dB = [[dL_dv[i] * x[j] for j in range(K)] for i in range(K)]

    dL_dy = [0] * K
    for i in range(K):
        dL_dy[i] = sum(dL_dz[j] * B[i][j] for j in range(K))
        dL_dy[i] *= y[i] * (1 - y[i])

    dL_dA = [[dL_dy[i] * x[j] for j in range(K)] for i in range(K)]
    return dL_dA, dL_dB, dL_dC

def manual_backpropagation_rounded(x, A, B, C):
    K = len(x)
    
    # Forward propagation
    y = [0] * K
    for i in range(K):
        y[i] = sum(A[i][j] * x[j] for j in range(K))
        y[i] = sigmoid(y[i])

    v = [0] * K
    for i in range(K):
        v[i] = sum(B[i][j] * x[j] for j in range(K))

    z = [u + v for u, v in zip(y, v)]

    w = [0] * K
    for i in range(K):
        w[i] = sum(C[i][j] * z[j] for j in range(K))

    for val in w:
        L = sum(val ** 2 for val in w)

    print(f"Loss Manual2: {L}")
    
    # Backpropagation ---- !!! Something wrong with this back propogation algorithm !!!
    # Compute gradients ∂L/∂A, ∂L/∂B, ∂L/∂C
    dL_dw = [round(2 * val, 4) for val in w]
    dL_dC = [[round(dL_dw[i] * z[j], 4) for j in range(K)] for i in range(K)]

    dL_dz = [0] * K
    for i in range(K):
        dL_dz[i] = sum(round(dL_dw[j] * C[i][j], 4) for j in range(K))

    dL_dv = [0] * K
    for i in range(K):
        dL_dv[i] = dL_dz[i]

    dL_dB = [[round(dL_dv[i] * x[j], 4) for j in range(K)] for i in range(K)]

    dL_dy = [0] * K
    for i in range(K):
        dL_dy[i] = sum(round(dL_dz[j] * B[i][j], 4) for j in range(K))
        dL_dy[i] *= round(y[i] * (1 - y[i]), 4)

    dL_dA = [[round(dL_dy[i] * x[j], 4) for j in range(K)] for i in range(K)]
    return dL_dA, dL_dB, dL_dC


def manual_backpropagation_2(x, A, B, C):
    K = len(x)

    x_tensor = torch.tensor(x, requires_grad=True, dtype=torch.float)
    # Convert A, B, and C to PyTorch tensors
    A_tensor = torch.tensor(A, requires_grad=True, dtype=torch.float)
    B_tensor = torch.tensor(B, requires_grad=True, dtype=torch.float)
    C_tensor = torch.tensor(C, requires_grad=True, dtype=torch.float)
    
    # Forward propagation
    y = torch.sigmoid(torch.matmul(A_tensor, x_tensor))
    v = torch.matmul(B_tensor, x_tensor)
    z = y + v
    w = torch.matmul(C_tensor, z)
    L = torch.norm(w)**2

    print(f"Loss Manual2: {L}")

    # Compute gradients manually
    dL_dA = torch.zeros_like(A_tensor)
    dL_dB = torch.zeros_like(B_tensor)
    dL_dC = torch.zeros_like(C_tensor)

    for i in range(K):
        for j in range(K):
            for k in range(K):
                dL_dA[i, j] += 2 * w[k] * C_tensor[k, i] * z[j] * y[i] * (1 - y[i]) * x_tensor[j]
                dL_dB[i, j] += 2 * w[i] * C_tensor[i, k] * z[k] * x_tensor[j]
                dL_dC[i, j] += 2 * w[i] * z[i] * y[j] * (1 - y[j])

    return dL_dA, dL_dB, dL_dC

# Automatic Differentiation with PyTorch
def autograd_torch_backpropagation(x, A, B, C):
    # Set a random seed for reproducibility
    random.seed(0)
    torch.manual_seed(0)
    
    K = len(x)

    x_tensor = torch.tensor(x, requires_grad=True, dtype=torch.float)
    # Convert A, B, and C to PyTorch tensors
    A_tensor = torch.tensor(A, requires_grad=True, dtype=torch.float)
    B_tensor = torch.tensor(B, requires_grad=True, dtype=torch.float)
    C_tensor = torch.tensor(C, requires_grad=True, dtype=torch.float)

    # Forward propagation
    y = torch.sigmoid(torch.matmul(A_tensor, x_tensor))
    v = torch.matmul(B_tensor, x_tensor)
    z = y + v
    w = torch.matmul(C_tensor, z)
    L = torch.norm(w)**2

    print(f"Loss Automatic: {L}")

    # Backpropagation
    L.backward()  # Automatically computes gradients

    # Access the gradients
    dL_dA = A_tensor.grad
    dL_dB = B_tensor.grad
    dL_dC = C_tensor.grad

    return dL_dA, dL_dB, dL_dC

# Define the input data
K = 3
A = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
B = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
C = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]

# Input vector x
x = [random.uniform(-1, 1) for _ in range(K)]

# Manual Backpropagation
dL_dA_manual, dL_dB_manual, dL_dC_manual = manual_backpropagation(x, A, B, C)

dL_dA_manual_round, dL_dB_manual_round, dL_dC_manual_round = manual_backpropagation_rounded(x, A, B, C)

dL_dA_manual_2, dL_dB_manual_2, dL_dC_manual_2 = manual_backpropagation_2(x, A, B, C)

# Automatic Differentiation with PyTorch
dL_dA_torch, dL_dB_torch, dL_dC_torch = autograd_torch_backpropagation(x, A, B, C)

# Print the gradients
print("--- Manual Backpropagation Gradient ∂L/∂A: ----")
for row in dL_dA_manual:
    print(row)

print("--- Manual Backpropagation Gradient 2 ∂L/∂A: ---")
for row in dL_dA_manual_2:
    print(row)

print("--- Manual Backpropagation Gradient Rounded ∂L/∂A: --- ")
for row in dL_dA_manual_round:
    print(row)

print("--- Automatic Differentiation with PyTorch Gradient ∂L/∂A: ---")
print(dL_dA_torch)

# print("Manual Backpropagation Gradient ∂L/∂B:")
# for row in dL_dB_manual:
#     print(row)

# print("Automatic Differentiation with PyTorch Gradient ∂L/∂B:")
# print(dL_dB_torch)

# print("Manual Backpropagation Gradient ∂L/∂C:")
# for row in dL_dC_manual:
#     print(row)

# print("Automatic Differentiation with PyTorch Gradient ∂L/∂C:")
# print(dL_dC_torch)

Loss Manual2: 6.39822721545684
Loss Manual2: 6.39822721545684
Loss Manual2: 6.398228168487549
Loss Automatic: 6.398228168487549
--- Manual Backpropagation Gradient ∂L/∂A: ----
[0.18968844463637405, 0.7065160304517885, 0.7981986829563948]
[-0.07376755578300863, -0.27475559087349993, -0.31040987227125494]
[-0.16144176584057704, -0.6013080858428822, -0.6793381911857007]
--- Manual Backpropagation Gradient 2 ∂L/∂A: ---
tensor([0.2211, 0.6091, 1.3381], grad_fn=<UnbindBackward0>)
tensor([0.4722, 1.3009, 2.8577], grad_fn=<UnbindBackward0>)
tensor([0.0839, 0.2312, 0.5079], grad_fn=<UnbindBackward0>)
--- Manual Backpropagation Gradient Rounded ∂L/∂A: --- 
[0.1897, 0.7067, 0.7984]
[-0.0738, -0.2747, -0.3104]
[-0.1615, -0.6014, -0.6794]
--- Automatic Differentiation with PyTorch Gradient ∂L/∂A: ---
tensor([[0.1668, 0.6211, 0.7017],
        [0.3561, 1.3264, 1.4986],
        [0.0633, 0.2358, 0.2663]])
