In [775]:
import random
import math
import numpy as np

K = 3
# Define the input data
A = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
B = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
C = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
# Input vector x
x = [random.uniform(-1, 1) for _ in range(K)]

In [776]:
# Define the sigmoid function
def sigmoid(value):
    return 1 / (1 + math.exp(-value))

def sigmoid_derivative(value):
    return sigmoid(value) * (1 - sigmoid(value))

def matrix_vector_multiplication(matrix, vector):
    if len(matrix) != len(matrix[0]) or len(matrix) != len(vector):
        raise ValueError("Matrix and vector dimensions don't match for multiplication")

    result = [0] * len(vector)
    for i in range(len(matrix)):
        for j in range(len(vector)):
            result[i] += matrix[i][j] * vector[j]

    return result

def outer_product_manual(arr1, arr2):
    # Ensure arr1 and arr2 have the same length
    if len(arr1) != len(arr2):
        raise ValueError("Input arrays must have the same length.")

    outer_result = []
    for i in range(len(arr1)):
        row = [arr1[i] * val for val in arr2]
        outer_result.append(row)

    return outer_result

def matrix_vector_dot_product_manual(matrix, vector):
    # Ensure the matrix has the correct shape (k x k) and the vector has length k
    k = len(vector)
    if len(matrix) != k or len(matrix[0]) != k:
        raise ValueError("Matrix dimensions must be k x k, and the vector must have length k.")

    result = [sum(matrix[i][j] * vector[i] for i in range(k)) for j in range(k)]
    return result

def vector_elementwise_multiply_manual(vector1, vector2):
    # Ensure both vectors have the same length (k)
    k = len(vector1)
    if len(vector2) != k:
        raise ValueError("Both vectors must have the same length (k).")

    result = [vector1[i] * vector2[i] for i in range(k)]
    return result

# Manual Backpropagation
def manual_backpropagation(x, A, B, C):
    K = len(x)
    
    # Initialze all other vectors to zero, of length K
    y, u, v, w = [0] * K, [0] * K, [0] * K, [0] * K

    # Forward propagation
    y = matrix_vector_multiplication(A, x)
    for i in range(K):
        u[i] = sigmoid(y[i])
    v = matrix_vector_multiplication(B, x)
    z = [u + v for u, v in zip(u, v)]
    w = matrix_vector_multiplication(C, z)

    L = sum(val ** 2 for val in w)

    #Backpropagation
    #Compute gradients ∂L/∂A, ∂L/∂B, ∂L/∂C
    dL_dw = [round(2 * val, 4) for val in w]
    dL_dz = matrix_vector_dot_product_manual(C, dL_dw)

    #Compute gradient ∂L/∂A
    y_prime = []
    for i in range(K):
        y_prime.append(sigmoid_derivative(y[i]))

    dL_dy = vector_elementwise_multiply_manual(dL_dz, y_prime)
    dL_dA = outer_product_manual(dL_dy, x)

    #Compute gradient ∂L/∂b
    dL_dv = dL_dz # dz_dv = 1
    dL_dB = outer_product_manual(dL_dv, x)

    #Compute gradient ∂L/∂C
    dL_dC = outer_product_manual(dL_dw, z)

    return dL_dA, dL_dB, dL_dC, L


# Manual Backpropagation
dL_dA, dL_dB, dL_dC, loss = manual_backpropagation(x, A, B, C)

print(f"Manual Backpropagation Loss: {loss}")
print("Manual Backpropagation Gradient ∂L/∂A:")
print(dL_dA)
print("Manual Backpropagation Gradient ∂L/∂B:")
print(dL_dB)
print("Manual Backpropagation Gradient ∂L/∂C:")
print(dL_dC)

[0.2475494922680394, 0.24990341037668337, 0.24613131555999815]
[2.6941037971865462, -0.25914906232662077, -1.3628943180419348]
[0.6669240271109266, -0.0647622344713422, -0.33545097146890795]
Manual Backpropagation Loss: 1.663755752483803
Manual Backpropagation Gradient ∂L/∂A:
[[-0.300422123654174, -0.2945285599976424, -0.4893708903715612], [0.029172750150796622, 0.028600450554619992, 0.047520783563584565], [0.15110700639637215, 0.14814264827815948, 0.24614488893866265]]
Manual Backpropagation Gradient ∂L/∂B:
[[-1.2135840833350835, -1.1897764657046253, -1.9768608123085325], [0.11673610258789216, 0.1144460194100996, 0.19015660287290892], [0.6139284066823165, 0.6018845994509281, 1.0000551468984498]]
Manual Backpropagation Gradient ∂L/∂C:
[[2.314151963169928, -0.026865090069508626, 0.591445414794839], [2.735048533881765, -0.031751296533941445, 0.6990171520066827], [-0.7215167400321055, 0.008376118991368682, -0.18440351989899695]]


# Verify the gradients using libraries - Torch, mxnet

In [777]:
import torch

A_torch = torch.tensor(A, requires_grad=True, dtype=torch.float32)
B_torch = torch.tensor(B, requires_grad=True, dtype=torch.float32)
C_torch = torch.tensor(C, requires_grad=True, dtype=torch.float32)
x_torch = torch.tensor(x, dtype=torch.float32)

# Define the sigmoid function
def sigmoid(value):
    return 1 / (1 + torch.exp(-value))

def forwardpropagation_torch(x, A, B, C):
    y = torch.matmul(A, x.unsqueeze(1)).squeeze()
    u = sigmoid(y)
    v = torch.matmul(B, x.unsqueeze(1)).squeeze()
    z = u + v
    w = torch.matmul(C, z.unsqueeze(1)).squeeze()
    L = torch.norm(w, p=2)**2
    return L

def backwardpropagation_torch(L_torch):
    L_torch.backward()
    dA = A_torch.grad
    dB = B_torch.grad
    dC = C_torch.grad
    return dA, dB, dC

L_torch = forwardpropagation_torch(x_torch, A_torch, B_torch, C_torch)
print(f"Loss (torch): {L_torch}")
dA_torch, dB_torch, dC_torch = backwardpropagation_torch(L_torch)


print(f"Gradients (torch) dL/dA:\n{dA_torch}")
print(f"Gradients (torch) dL/dB:\n{dB_torch}")
print(f"Gradients (torch) dL/dC:\n{dC_torch}")

Loss (torch): 1.6637557744979858
Gradients (torch) dL/dA:
tensor([[-0.3004, -0.2945, -0.4894],
        [ 0.0292,  0.0286,  0.0475],
        [ 0.1511,  0.1481,  0.2462]])
Gradients (torch) dL/dB:
tensor([[-1.2136, -1.1898, -1.9769],
        [ 0.1167,  0.1144,  0.1901],
        [ 0.6139,  0.6019,  1.0001]])
Gradients (torch) dL/dC:
tensor([[ 2.3141, -0.0269,  0.5914],
        [ 2.7351, -0.0318,  0.6990],
        [-0.7215,  0.0084, -0.1844]])


In [778]:
import mxnet as mx
from mxnet import autograd

x_mx = mx.nd.array(x_torch.detach().numpy())
A_mx = mx.nd.array(A_torch.detach().numpy())
B_mx = mx.nd.array(B_torch.detach().numpy())
C_mx = mx.nd.array(C_torch.detach().numpy())

def sigmoid(value):
    return 1 / (1 + mx.nd.exp(-value))

def forwardpropagation_mx(x, A, B, C):
    y = mx.nd.dot(A, x)
    u = sigmoid(y)
    v = mx.nd.dot(B, x)
    z = u + v
    w = mx.nd.dot(C, z)
    L = mx.nd.norm(w) ** 2
    return L

def backwardpropagation_mx(L_mx):
    L_mx.backward()
    dA = A_mx.grad
    dB = B_mx.grad
    dC = C_mx.grad
    return dA, dB, dC

A_mx.attach_grad()
B_mx.attach_grad()
C_mx.attach_grad()

with autograd.record():
    L_mx = forwardpropagation_mx(x_mx, A_mx, B_mx, C_mx)
    print(f"Loss (mx): {L_mx}")

dA_mx, dB_mx, dC_mx = backwardpropagation_mx(L_mx)

print(f"Gradients (mx) dL/dA:\n{dA_mx}")
print(f"Gradients (mx) dL/dB:\n{dB_mx}")
print(f"Gradients (mx) dL/dC:\n{dC_mx}")


Loss (mx): 
[1.6637558]
<NDArray 1 @cpu(0)>
Gradients (mx) dL/dA:

[[-0.3004252  -0.29453158 -0.4893759 ]
 [ 0.02917137  0.02859909  0.04751853]
 [ 0.1511112   0.14814675  0.24615172]]
<NDArray 3x3 @cpu(0)>
Gradients (mx) dL/dB:

[[-1.2135965  -1.1897885  -1.9768808 ]
 [ 0.11673056  0.11444059  0.19014758]
 [ 0.6139454   0.6019013   1.0000829 ]]
<NDArray 3x3 @cpu(0)>
Gradients (mx) dL/dC:

[[ 2.3141418  -0.0268649   0.59144276]
 [ 2.7350845  -0.03175163  0.69902635]
 [-0.72155     0.00837648 -0.18441202]]
<NDArray 3x3 @cpu(0)>


In [779]:

def manual_backpropagation2(x, A, B, C):
    K = len(x)
    
    # Forward propagation
    y = [0] * K
    for i in range(K):
        y[i] = sum(A[i][j] * x[j] for j in range(K))
    
    # Convert y to a NumPy array
    y = np.array([sigmoid(y[i]) for i in range(K)])

    v = [0] * K
    for i in range(K):
        v[i] = sum(B[i][j] * x[j] for j in range(K))

    z = [u + v for u, v in zip(y, v)]

    w = [0] * K
    for i in range(K):
        w[i] = sum(C[i][j] * z[j] for j in range(K))

    L = sum(val ** 2 for val in w)

    # Backpropagation
    # Compute gradients ∂L/∂A, ∂L/∂B, ∂L/∂C
    dL_dw = [round(2 * val, 4) for val in w]
    dL_dC = np.outer(dL_dw, z)

    dL_dz = np.dot(dL_dw, C)
    dL_dv = dL_dz
    dL_dB = np.outer(dL_dv, x)

    dL_dy = np.multiply(dL_dz, np.multiply(y, 1 - y))
    dL_dA = np.outer(dL_dy, x)

    return dL_dA, dL_dB, dL_dC

In [780]:
def custom_elementwise_multiply(vector1, vector2):
    # Ensure both vectors have the same length (k)
    k = len(vector1)
    if len(vector2) != k:
        raise ValueError("Both vectors must have the same length (k).")

    result = [vector1[i] * vector2[i] for i in range(k)]
    return result

# Example usage:
k = 4
vector1 = [1, 2, 3, 4]
vector2 = [0.1, 0.2, 0.3, 0.4]

result = custom_elementwise_multiply(vector1, vector2)
print(result)

[0.1, 0.4, 0.8999999999999999, 1.6]
