In [2]:
from math import log,exp,fsum
from data import load_mnist,load_synth
import numpy as np

In [173]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def softmax(o):
  """
    Parameters:
      -o : vector of softmax input
      -i : index i of numerator 
  """
  return np.exp(o) / fsum(np.exp(o))  

def cross_entropy_loss(y, true_y):
    """
    Parameters:
        -y: vector output of softmax
        -true_y: true target value (or index)
    """
    # ytrain goes from 0 to 9
    return -log(y[true_y])  





In [437]:
class DNN:
    def __init__(self):
        self.x = None #first layer input nodes
        self.W = None #first layer weights matrix
        self.b = None #input bias
        self.k = None #first layer linear output
        self.h = None #sigmoid activation output
        self.V = None #second layer weight matrix
        self.c = None #second layer bias
        self.o = None #softmax input
        self.y = None #softmax output
        self.t = None #target vector        

        # (self.xtrain, self.ytrain), (self.xval, self.yval), self.num_cls = load_mnist()
        (self.xtrain, self.ytrain), (self.xval, self.yval), self.num_cls = load_synth()


    def default_init(self):
        self.set_nodes()
        self.set_weights_W()
        self.set_weights_V()
    
    def set_nodes(self,x=784,k=300,h=300,o=10,y=10):
        """
        Inititalize list of nodes with specific sizes
        Also works for reset values for a new fordward pass
        Parameters:
            x: size of first layer input nodes
            k: size of first layer linear output
            h: size of sigmoid nodes layer
            o: size of softmax input layer
            y: size of softmax output layer
        """
        self.x = np.zeros((x,1))
        self.k = np.zeros((k,1))
        self.h = np.zeros((h,1))
        self.o = np.zeros((o,1))
        self.y = np.zeros((y,1))

        #bias
        self.b = np.zeros((k,1))
        self.c = np.zeros((o,1))


    def set_weights_W(self,mu=0.0,sigma=1.0):
        """
        Initialize weights matrix W
        Parameters:
            -mu     : mean of the normal distribution from where the random weights are generated
            -sigma  : standar deviation of the normal distribution from where the random weights are generated
        """
        x_rows = np.shape(self.x)[0]
        k_rows = np.shape(self.k)[0]
        # self.W = np.random.normal(loc=mu,scale=sigma,size=(k_rows,x_rows))
        self.W = np.array([[1., -1.], [1.,-1.], [1., -1.]])
        
        

    def set_weights_V(self,mu=0.0,sigma=1.0):
        """
        Initialize weights matrix V
        Parameters:
            -mu     : mean of the normal distribution from where the random weights are generated
            -sigma  : standar deviation of the normal distribution from where the random weights are generated
        """
        h_cols = np.shape(self.h)[0]
        o_cols = np.shape(self.o)[0]
        # self.V = np.random.normal(loc=mu,scale=sigma,size=(o_cols,h_cols))
        self.V = np.array([[1., -1., -1.], [1.,-1., -1.]])

    def set_derivative_lists(self):
        # dl_dy = [0.  for _ in range(len(self.y))]                               #derivatives of the loss wrt softmax output
        # dy_do = [[0. for _ in range(len(self.y))] for __ in range(len(self.o))] #derivatives of the softmax output wrt softmax input
        # dl_do = [0.  for _ in range(len(self.o))]                               #derivatives of the loss wrt softmax input
        # do_dh = [[0. for _ in range(len(self.k))] for __ in range(len(self.o))] #derivatives of the softmax input wrt to sigmoid output
        # dl_dh = [0.  for _ in range(len(self.h))]                               #derivatives of the loss wrt sigmoid output
        # dl_dv = [[0. for _ in range(len(self.o))] for __ in range(len(self.h))] #derivatives of the loss wrt to weights V
        # dl_dc = [0.  for _ in range(len(self.o))]                               #derivatives of the loss wrt to bias C
        # dl_dk = [0.  for _ in range(len(self.k))]                               #derivatives of the loss wrt to sigmoid input
        # dh_dk = [0.  for _ in range(len(self.k))]                               #derivatives of the sigmoid output wrt to sigmoid input (only interested in same i-index e.g dHi/dK))
        # dl_dw = [[0. for _ in range(len(self.k))] for __ in range(len(self.x)) ]#derivatives of the loss wrt to weights W
        # dl_db = [0.  for _ in range(len(self.k))]                               #derivatives of the loss wrt to bias B

        y_rows = np.shape(self.y)[0]
        o_rows = np.shape(self.o)[0]
        h_rows = np.shape(self.h)[0]
        k_rows = np.shape(self.k)[0]
        x_rows = np.shape(self.x)[0]

        self.dl_dy = np.zeros((y_rows,1))                                #derivatives of the loss wrt softmax output
        self.dy_do = np.zeros((y_rows,o_rows))                           #derivatives of the softmax output wrt softmax input
        self.dl_do = np.zeros((1,o_rows))                                #derivatives of the loss wrt softmax input
        self.do_dh = np.zeros((o_rows,h_rows))                           #derivatives of the softmax input wrt to sigmoid output
        self.dl_dh = np.zeros((1,h_rows))                                #derivatives of the loss wrt sigmoid output
        self.do_dv = np.zeros((o_rows,))
        self.dl_dv = np.zeros((o_rows,h_rows))                           #derivatives of the loss wrt to weights V
        self.dl_dc = np.zeros((1,o_rows))                                #derivatives of the loss wrt to bias C
        self.dl_dk = np.zeros((1,k_rows))                                #derivatives of the loss wrt to sigmoid input
        self.dh_dk = np.zeros((1,k_rows))                                #derivatives of the sigmoid output wrt to sigmoid input (only interested in same i-index e.g dHi/dK))
        self.dl_dw = np.zeros((k_rows,x_rows))                           #derivatives of the loss wrt to weights W
        self.dl_db = np.zeros((1,k_rows))                                #derivatives of the loss wrt to bias B        

    def onehot_encode_true_y(self):
        """
        Creates a matrix of size (n,n) fulls of zeros except from the last column and i-th row, where i is the index of the target
        """
        y_rows = np.shape(self.y)[0]

        encoded = np.zeros((y_rows,y_rows),dtype=int)
        encoded[self.t,y_rows-1] = 1

        return  encoded

    def gradient_dy_do(self):
        y_ = self.y.reshape(-1,1)

        return np.diagflat(y_) - np.dot(y_, y_.T)       

    def forward_pass(self,x,true_y,verbose=False):
        self.set_nodes(x=2,k=3,h=3,o=2,y=2)
        self.x = x.reshape(2,1)
        self.t = true_y

        self.k = self.W.dot(self.x) + self.b
        
        self.h = sigmoid(self.k)
        self.o = self.V.dot(self.h) + self.c
        self.y = softmax(self.o)

        loss = cross_entropy_loss(self.y,self.t)

        if verbose: self.report_f(target=self.t,loss=loss)
        
        return loss        
    
    def backward_pass(self,alpha=0.01,verbose=False):
        self.set_derivative_lists()
        
        true_y_encoded = self.onehot_encode_true_y() # TODO: OJO CON ESTO AL PASAR A RED GRANDE
        # print(f"encoded: {true_y_encoded.tolist()} \t shape: {true_y_encoded.shape}")

        """ ############# DL / DY #############"""
        # dl_dy = np.where(dl_dy.__index__ , dl_dy, 10*a)
        self.dl_dy = true_y_encoded.dot((1/self.y))

        """ ############# DY / DO #############"""

        self.dy_do = self.gradient_dy_do()
        # print(dy_do)

        """ ############# DL / DO #############"""
        print(f"\n {self.dl_dy.shape}*{self.dy_do.shape} = {(self.dl_dy * self.dy_do).shape}")
        self.dl_do = (self.dl_dy * self.dy_do).sum(axis=0) # TODO: OJO AQUI 
        
        """ ############# DL / DV #############"""
        """|->        and DO / DV      """
        self.do_dv = self.h
        self.dl_dv = self.dl_do * self.do_dv # dl_do.shape=(2,) and h.shape=(3,1) --> (dl_do * h).shape = (3,2)
        
        """ ############# DL / DC #############"""
        self.dl_dc = self.dl_do
        
        """ ############# DO / DH #############"""
        
        self.do_dh = self.V

        """ ############# DL / DH #############"""
        # self.dl_dh = (self.do_dh * self.dl_do).sum(axis=0)
        # dl_do =(self.dl_dy * self.dy_do).sum(axis=0)
        # print(dl_do)

        if verbose: self.report_derivs()


    def train_epoch(self,alpha=0.02,rounds=None,verbose=False):
        self.losses = []
        counter = 0

        for x,true_y in zip(self.xtrain,self.ytrain):
            # target_i = [0,1] if self.ytrain[i] == 0 else [1,0]
            
            x=np.array([1,-1])

            loss = self.forward_pass(x,true_y,verbose=verbose)
            self.backward_pass(alpha=alpha,verbose=verbose)
            
            self.losses.append(loss)

            # self.report_f()
            if rounds != None:
                if counter < rounds:
                    counter += 1
                else:
                    break        

    def report_f(self,target=[0,0],loss=0):
        print(f"##### FORWARD #######")
        print(f"-t = {target}\tloss = {loss}")
        print(f"-y = {self.y.tolist()}")
        print(f"-o = {self.o.tolist()}")
        print(f"-V = {self.V.tolist()}\tshape={self.V.shape}")
        print(f"-c = {self.c.tolist()}")
        print(f"-h = {self.h.tolist()}")
        print(f"-k = {self.k.tolist()}")
        print(f"-W = {self.W.tolist()}")
        # print(f"-W = {self.W}")
        print(f"-b = {self.b.tolist()}")
        print(f"-x = {self.x.tolist()}")                

    def report_derivs(self):
        print(f"\t|##### DERIVS #######")
        print(f"\t|dl_dy = {self.dl_dy.tolist()}")
        print(f"\t|dy_do = {self.dy_do.tolist()}")
        print(f"\t|dl_do = {self.dl_do.tolist()}")
        print(f"\t|dl_dv = {self.dl_dv.tolist()}")
        print(f"\t|dl_dc = {self.dl_dc.tolist()}")
        print(f"\t|do_dh = {self.do_dh.tolist()}")
        print(f"\t|dl_dh = {self.dl_dh.tolist()}")
        print(f"\t|dh_dk = {self.dh_dk.tolist()}")
        print(f"\t|dl_dk = {self.dl_dk.tolist()}")
        print(f"\t|dl_dw = {self.dl_dw.tolist()}")
        print(f"\t|dl_db = {self.dl_db.tolist()}")        


In [438]:
dnn = DNN()

dnn.set_nodes(x=2,k=3,h=3,o=2,y=2)
dnn.set_weights_W()
dnn.set_weights_V()
# dnn.default_init()*
dnn.train_epoch(rounds=0,verbose=True)


##### FORWARD #######
-t = 1	loss = 0.6931471805599453
-y = [[0.5], [0.5]]
-o = [[-0.8807970779778823], [-0.8807970779778823]]
-V = [[1.0, -1.0, -1.0], [1.0, -1.0, -1.0]]	shape=(2, 3)
-c = [[0.0], [0.0]]
-h = [[0.8807970779778823], [0.8807970779778823], [0.8807970779778823]]
-k = [[2.0], [2.0], [2.0]]
-W = [[1.0, -1.0], [1.0, -1.0], [1.0, -1.0]]
-b = [[0.0], [0.0], [0.0]]
-x = [[1], [-1]]

 (2, 1)*(2, 2) = (2, 2)
	|##### DERIVS #######
	|dl_dy = [[0.0], [2.0]]
	|dy_do = [[0.25, -0.25], [-0.25, 0.25]]
	|dl_do = [-0.5, 0.5]
	|dl_dv = [[-0.44039853898894116, 0.44039853898894116], [-0.44039853898894116, 0.44039853898894116], [-0.44039853898894116, 0.44039853898894116]]
	|dl_dc = [-0.5, 0.5]
	|do_dh = [[1.0, -1.0, -1.0], [1.0, -1.0, -1.0]]
	|dl_dh = [[0.0, 0.0, 0.0]]
	|dh_dk = [[0.0, 0.0, 0.0]]
	|dl_dk = [[0.0, 0.0, 0.0]]
	|dl_dw = [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]
	|dl_db = [[0.0, 0.0, 0.0]]


In [378]:
print(f"dl_do: {dnn.dl_do.tolist()} \t shape: {dnn.dl_do.shape}")
dl_do = dnn.dl_do.reshape(2,1)
print(f"dl_do: {dl_do.tolist()} \t shape: {dl_do.shape}")

print(f"h    : {dnn.h.tolist()} \t shape: {dnn.h.shape}")
print(f"\ndl_do*h = {(dl_do * dnn.h).tolist()}")
# print(f"\ndl_do*h = {(dnn.dl_do * dnn.h).tolist()}")



dl_do: [[-0.5], [0.5]] 	 shape: (2, 1)
dl_do: [[-0.5], [0.5]] 	 shape: (2, 1)
h    : [[0.8807970779778823], [0.8807970779778823], [0.8807970779778823]] 	 shape: (3, 1)


ValueError: operands could not be broadcast together with shapes (2,1) (3,1) 

In [347]:
# dnn.h.shape
np.tile(dnn.h,(1,2))


array([[0.79639203, 0.79639203],
       [0.79639203, 0.79639203],
       [0.79639203, 0.79639203]])

In [443]:
x = np.array([[2],[4]])  #(2,1) 
y = np.array([[2,2],[4,4]])  #(2,2) 

print(f"x = {x.tolist()}")
print(f"y = {y.tolist()}")

z = x*y

print(f"z = {z.tolist()}")




x = [[2], [4]]
y = [[2, 2], [4, 4]]
z = [[4, 4], [16, 16]]


In [8]:
# type(q5_dnn.xtrain)
# print(np.unique(q5_dnn.xtrain))
print(f"number of training instances:     {len(dnn.xtrain)}")
print(f"number of input nodes:            {len(dnn.xtrain[0])}")
print(f"(given) number of output nodes:   10")
print(f"(given) hidden layer size:        300")

number of training instances:     60000
number of input nodes:            2
(given) number of output nodes:   10
(given) hidden layer size:        300
