In [2]:
from math import log,exp,fsum
from data import load_mnist,load_synth
import numpy as np

In [11]:
class DNN:
    def __init__(self):
        self.x = None #first layer input nodes
        self.W = None #first layer weights matrix
        self.b = None #input bias
        self.k = None #first layer linear output
        self.h = None #sigmoid activation output
        self.V = None #second layer weight matrix
        self.c = None #second layer bias
        self.o = None #softmax input
        self.y = None #softmax output
        self.t = None #target vector        

        # (self.xtrain, self.ytrain), (self.xval, self.yval), self.num_cls = load_mnist()
        (self.xtrain, self.ytrain), (self.xval, self.yval), self.num_cls = load_synth()


    def default_init(self):
        self.set_nodes()
        self.set_weights_W()
        self.set_weights_V()
    
    def set_nodes(self,x=784,k=300,h=300,o=10,y=10):
        """
        Inititalize list of nodes with specific sizes
        Also works for reset values for a new fordward pass
        Parameters:
            x: size of first layer input nodes
            k: size of first layer linear output
            h: size of sigmoid nodes layer
            o: size of softmax input layer
            y: size of softmax output layer
        """
        self.x = [0. for _ in range(x)]
        self.k = [0. for _ in range(k)]
        self.h = [0. for _ in range(h)]
        self.o = [0. for _ in range(o)]
        self.y = [0. for _ in range(y)]

        #bias
        self.b = [0. for _ in range(k)]
        self.c = [0. for _ in range(o)]    


    def set_weights_W(self,mu=0.0,sigma=1.0):
        """
        Initialize weights matrix W
        Parameters:
            -mu     : mean of the normal distribution from where the random weights are generated
            -sigma  : standar deviation of the normal distribution from where the random weights are generated
        """
        self.W = [[np.random.normal(loc=mu,scale=sigma) for _ in range(len(self.k))] for __ in range(len(self.x))]
        

    def set_weights_V(self,mu=0.0,sigma=1.0):
        """
        Initialize weights matrix V
        Parameters:
            -mu     : mean of the normal distribution from where the random weights are generated
            -sigma  : standar deviation of the normal distribution from where the random weights are generated
        """
        self.V = [[np.random.normal(loc=mu,scale=sigma) for _ in range(len(self.o))] for __ in range(len(self.h))]

    def set_derivative_lists(self):
        dl_dy = [0.  for _ in range(len(self.y))]                               #derivatives of the loss wrt softmax output
        dy_do = [[0. for _ in range(len(self.y))] for __ in range(len(self.o))] #derivatives of the softmax output wrt softmax input
        dl_do = [0.  for _ in range(len(self.o))]                               #derivatives of the loss wrt softmax input
        do_dh = [[0. for _ in range(len(self.k))] for __ in range(len(self.o))] #derivatives of the softmax input wrt to sigmoid output
        dl_dh = [0.  for _ in range(len(self.h))]                               #derivatives of the loss wrt sigmoid output
        dl_dv = [[0. for _ in range(len(self.o))] for __ in range(len(self.h))] #derivatives of the loss wrt to weights V
        dl_dc = [0.  for _ in range(len(self.o))]                               #derivatives of the loss wrt to bias C
        dl_dk = [0.  for _ in range(len(self.k))]                               #derivatives of the loss wrt to sigmoid input
        dh_dk = [0.  for _ in range(len(self.k))]                               #derivatives of the sigmoid output wrt to sigmoid input (only interested in same i-index e.g dHi/dK))
        dl_dw = [[0. for _ in range(len(self.k))] for __ in range(len(self.x)) ]#derivatives of the loss wrt to weights W
        dl_db = [0.  for _ in range(len(self.k))]                               #derivatives of the loss wrt to bias B

        return dl_dy,dy_do,dl_do,do_dh,dl_dh,dl_dv,dl_dc,dl_dk,dh_dk,dl_dw,dl_db

    def cross_entropy(self, y, true_index):
        # ytrain goes from 0 to 9
        return -log(y[true_index])

    def train(self,x,true_y,alpha,verbose=False):
        # TODO START WITH FORDWARD PASS MATRIX MODE
        """ ######################################## FORWARD #####################################"""
        
        self.set_nodes(x=2,k=3,h=3,o=2,y=2)

        self.x = x
        self.t = true_y

        self.k = self.W.dot(self.x) + self.b

        if verbose: self.report_f(self.t)

        return None

    def train_epoch(self,alpha=0.02,rounds=None,verbose=False):
        self.losses = []
        counter = 0

        for x,true_y in zip(self.xtrain,self.ytrain):
            # target_i = [0,1] if self.ytrain[i] == 0 else [1,0]
            
            loss = self.train(x,true_y,alpha=alpha,verbose=verbose)

            self.losses.append(loss)

            # self.report_f()
            if rounds != None:
                if counter < rounds:
                    counter += 1
                else:
                    break        

    def report_f(self,target=[0,0],loss=0):
        print(f"##### FORWARD #######")
        print(f"-t = {target}\tloss = {loss}")
        print(f"-y = {self.y}")
        print(f"-o = {self.o}")
        print(f"-V = {self.V}")
        print(f"-c = {self.c}")
        print(f"-h = {self.h}")
        print(f"-k = {self.k}")
        print(f"-W = {self.W}")
        print(f"-b = {self.b}")
        print(f"-x = {self.x}")                



In [12]:
dnn = DNN()

dnn.set_nodes(x=2,k=3,h=3,o=2,y=2)
dnn.set_weights_W()
dnn.set_weights_V()
# dnn.default_init()*

dnn.train_epoch(rounds=2,verbose=True)


# dl_dy,dy_do,dl_do,do_dh,dl_dh,dl_dv,dl_dc,dl_dk,dh_dk,dl_dw,dl_db = dnn.set_derivative_lists()

# print(do_dh)




AttributeError: 'list' object has no attribute 'dot'

In [5]:
true_y = dnn.ytrain[0]
x = dnn.xtrain[0]
print(true_y)
# print(dnn.xtrain[[0,0,0,1] -1])

fake_o = [0.3, 0.1, 0.05, 0.05, 0.1111, 0.06,0.04, 0.06, 0.1, 0.2]


print(fake_o[-4 -1])
# print(fake_o[-1 -1])


1
0.06


In [6]:
print(np.unique(dnn.ytrain))

[0 1]


In [7]:
np.random.normal(loc=0,scale=1.0)

-0.8349477286608628

In [8]:
# type(q5_dnn.xtrain)
# print(np.unique(q5_dnn.xtrain))
print(f"number of training instances:     {len(dnn.xtrain)}")
print(f"number of input nodes:            {len(dnn.xtrain[0])}")
print(f"(given) number of output nodes:   10")
print(f"(given) hidden layer size:        300")

number of training instances:     60000
number of input nodes:            2
(given) number of output nodes:   10
(given) hidden layer size:        300
