In [3]:
import random
import math

In [34]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

def softmax(o,i):
  """
    Parameters:
      -o : vector of softmax input
      -i : index i of o
  """
  return math.exp(o[i]) / sum([math.exp(oj) for oj in o])

def cross_entropy_loss(y,t):
  """
    Parameters:
      -y: vector y output of softmax
      -t: target vector
  """
  return -1 * math.log(y[t.index(1)])

def scalar_dl_dy(y,t,i):
  """
    Parameters:
      -y: vector output of softmax
      -t: target vector
      -i: index of y
  """
  if i == t.index(1):
      dl_dy_i = -1.0 / y[i]
  else:
      dl_dy_i = 0.0  
  
  return dl_dy_i

def scalar_dy_do(y,i,j):
  """
    Parameters:
      -y: vector output of softmax
      -i: index of y
      -j: index of o ( o = h*V +c)
  """
  if i==j:
    dy_do = y[i]*(1-y[i])
  else:
    dy_do = -1*y[i]*y[j]
  
  return dy_do

def scalar_dl_do(dl_dy,dy_do,i):
  """
    Parameters:
      -dl_dy: vector of derivates of the loss wrt softmax output
      -dy_do: matrix with derivatives of softmax output wrt softmax input ( o = h*V +c)
      -i    : index of o
  """
  dl_do_i = 0.0
  for k,dl_dy[k] in enumerate(dl_dy):
    
    dl_do_i += dl_dy[k]*dy_do[k][i]

  return dl_do_i

def scalar_do_dh(V,i,j):
  """
      Parameters:
        -V: matrix of weights from sigmoid output to softmax input
        -i: index of o (softmax input)
        -j: index of h (sigmoid output)
    """
  return V[j][i]

"""DEPREC"""
# def scalar_do_dv(h,m):
#   """
#       Parameters:
#         -h: vector of sigmoid outputs
#         -i: index of o (softmax input)
#         -m: row index of V
#         -n: column index of V
#     """
#   # if i==n:
#   #   do_dv = h[m]
#   # else:
#   #   do_dv = 0

#   return h[m]

def scalar_dl_dh(dl_do,do_dh,i):
  """
    Parameters:
      -dl_do: vector of derivates of the loss wrt softmax input
      -do_dh: matrix with derivatives of softmax input wrt sigmoid output
      -i    : index of k (sigmoid output)
  """
  dl_dh_i = 0.0
  for j,dl_do[j] in enumerate(dl_do):
    
    dl_dh_i += dl_do[j]*do_dh[j][i]

  return dl_dh_i  



def scalar_dl_dv(dl_do,do_dv_ij,j):
  """
    Parameters:
      -dl_do    : vector of derivatives of the loss wrt softmax inputs
      -do_dv_ij : scalar derivative of the ith-element of softmac inputs wrt the ij_th elements of weights V
      -j        : column index of weights V
  """
  
  return dl_do[j]*do_dv_ij

def scalar_dl_dc(dl_do,do_dc_i,i):
  """
    -Parameters:
      -dl_do    : derivative of the loss wrt softmax input
      -do_dc_i  : derivative of softmax inpuot with respect to weight c (always 1)
      -i        : index of softmax input
  """
  return dl_do[i]*do_dc_i

def scalar_dh_dk(k,i):
  """
    -Parameters:
      -k: vector linear input of sigmoid
      -i: index of k
  """
  return sigmoid(k[i])*(1-sigmoid(k[i]))

def scalar_dl_dk(dl_dh,dh_dk,i):
  """
    -Parameters:
      -dl_dh: derivative of the loss wrt sigmoid output
      -dh_dk: derivative of the sigmoid output wrt to sigmoid input
      -i    : index of sigmoid input k
  """
  return dl_dh[i]*dh_dk[i]

def scalar_dl_dw(dl_dk, dk_dw_ij,j):
  """
      -dl_dk    : vector of derivatives of the loss wrt sigmoid inputs
      -dk_dw_ij : scalar derivative of the ith-element of sigmoid inputs wrt the ij_th elements of weights W
      -j        : column index of weights W
  """

  return dl_dk[j]*dk_dw_ij

def scalar_dl_db(dl_dk,dk_db_i,i):
  """
    -Parameters:
      -dl_dk    : derivative of the loss wrt sigmoid input
      -dk_db_i  : derivative of sigmoid input with respect to weight b (always 1)
      -i        : index of sigmoid input
  """
  return dl_dk[i]*dk_db_i  


In [5]:
# Initialize lists of inputs, bias, weights and target class
def initialize():
    x = [1.0,-1.0]              #inputs

    W = [[1.0, 1.0, 1.0],
        [-1.0, -1.0, -1.0]
        ]                       #input weights

    b = [0.0, 0.0, 0.0]         # input-bias

    k = [0.0, 0.0, 0.0]         # linear outputs  (x * W + b )

    h = [0.0, 0.0, 0.0]         # sigmoid activation outputs

    c = [0.0, 0.0]              # constant-bias 

    V = [[1.0, 1.0],
        [-1.0, -1.0],
        [-1.0, -1.0]
        ]                       # sigmoid-output weights

    o = [0.0, 0.0]              # h * V + c

    y = [0.0, 0.0]              # softmax activated output

    t = [1,0]                   #target class
    
    return x,W,b,k,h,c,V,o,y,t

## FORWARD PASS

In [6]:
x,W,b,k,h,c,V,o,y,t = initialize()

# K = X * W + b
for j in range(0,len(k)):     # Len of K-nodes
    for i in range(0,len(x)): # number of inputs
        k[j] += W[i][j] * x[i]

    k[j] += b[j]

    """sigmoid activation"""
    h[j] = sigmoid(k[j])

print(f"-Linear non-activated outputs: \nk = {k}\n")
print(f"-Sigmoid activated outputs: \nh = {h}")


-Linear non-activated outputs: 
k = [2.0, 2.0, 2.0]

-Sigmoid activated outputs: 
h = [0.8807970779778823, 0.8807970779778823, 0.8807970779778823]


In [7]:
""" SoftMax Activation"""

for j in range(0,len(y)):
    for i in range(0,len(h)):
        o[j] += h[i] * V[i][j]

    o[j] += c[j]

for j in range(0,len(y)):
    y[j] = softmax(o,j)

print(f"-Soft-Max activation : \nY={y}")

-Soft-Max activation : 
Y=[0.5, 0.5]


In [8]:
""" Loss Function"""

l = cross_entropy_loss(y,t)

print(f"Loss = -log(y_c) = {l}")

Loss = -log(y_c) = 0.6931471805599453


## BACKWARD PASS

In [9]:
""" DL / DY """
dl_dy = [0.0, 0.0] #derivatives of the loss w.r.t softmax output

for i in range(0,len(dl_dy)):

    dl_dy[i] = scalar_dl_dy(y,t,i)

print(f"dl_dy = {dl_dy}")


dl_dy = [-2.0, 0.0]


In [10]:
""" DY / DO """

dy_do = [[0.0, 0.0],[0.0, 0.0]] #derivatives of the loss w.r.t softmax output

for i,y_i in enumerate(y):
    for j,o_j in enumerate(o):
        dy_do[i][j] = scalar_dy_do(y,i,j)

print(f"dy_do = {dy_do}")


dy_do = [[0.25, -0.25], [-0.25, 0.25]]


In [11]:
""" DL / DO """

dl_do = [0.0, 0.0]

for i in range(0,len(dl_do)):

    dl_do[i] = scalar_dl_do(dl_dy,dy_do,i)

print(f"dl_do = {dl_do}")


dl_do = [-0.5, 0.5]


In [12]:
""" DO / DH """

do_dh = [[0.0,0.0,0.0],[0.0,0.0,0.0]]

for i,o_i in enumerate(o):
    for j,k_j in enumerate(k):
        do_dh[i][j] = scalar_do_dh(V,i,j)

print(f"do_dh = {do_dh}")


do_dh = [[1.0, -1.0, -1.0], [1.0, -1.0, -1.0]]


In [13]:
""" DL / DH """

dl_dh = [0.0, 0.0, 0.0]


for i,k[i] in enumerate(k):
    dl_dh[i] = scalar_dl_dh(dl_do,do_dh,i)

print(f"dl_dh = {dl_dh}")


dl_dh = [0.0, 0.0, 0.0]


In [26]:
""" DL / DV"""

dl_dv = [[0.0, 0.0],[0.0, 0.0],[0.0, 0.0]]

for i,h_i in enumerate(h): # V edges come out from h-nodes
    for j, __ in enumerate(dl_dv[i]):
        dl_dv[i][j] = scalar_dl_dv(dl_do, h_i, i, j)  # dOi_dVij is just h[i]


print(f"dl_dv = {dl_dv}")

dl_dv = [[-0.44039853898894116, 0.44039853898894116], [-0.44039853898894116, 0.44039853898894116], [-0.44039853898894116, 0.44039853898894116]]


In [19]:
""" DL / DC """
dl_dc = [0.0, 0.0]

for i,c_i in enumerate(c):
    do_dc_i  = 1  # O_i = sum_j(h_j*V_ji) + c_i  ==> do_i/dc_i = 1 for any i
    dl_dc[i] = scalar_dl_dc(dl_do,do_dc_i,i)

print(f"dl_dc = {dl_dc}")

dl_dc = [-0.5, 0.5]


In [None]:
""" DH / DK """

dh_dk = []

In [24]:
""" DL / DK """
"""   -> using DH/DK"""    

dl_dk = [0.0, 0.0, 0.0]
dh_dk = [0.0, 0.0, 0.0]  #only interested in same i-index e.g dHi/dKi
for i, k_i in enumerate(k):
    dh_dk[i] = scalar_dh_dk(k,i)

for i,k_i in enumerate(k):
    dl_dk[i] = scalar_dl_dk(dl_dh,dh_dk,i)

print(f"dh_dk = {dh_dk}")
print(f"dl_dk = {dl_dk}")


dh_dk = [0.10499358540350662, 0.10499358540350662, 0.10499358540350662]
dl_dk = [0.0, 0.0, 0.0]


In [30]:
""" DL / DW """

dl_dw = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]

for i, x_i in enumerate(x): # W edges come out from x-nodes
    for j, __ in enumerate(dl_dw[i]):
        dl_dw[i][j] = scalar_dl_dw(dl_dk, x_i,j) # dKi_dWij is just x[i]

print(f"dl_dw = {dl_dw}")

dl_dw = [[0.0, 0.0, 0.0], [-0.0, -0.0, -0.0]]


In [35]:
""" DL / DB """

dl_db = [0.0, 0.0, 0.0]

for i,b_i in enumerate(b):
    dk_db_i  = 1  # K_i = sum_j(x_j*W_ji) + b_i  ==> dk_i/db_i = 1 for any i
    dl_db[i] = scalar_dl_db(dl_dk,dk_db_i,i)

print(f"dl_db = {dl_db}")




dl_db = [0.0, 0.0, 0.0]
