In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
from IPython.display import HTML,Latex
import os 

In [27]:
'''
Using the 07--3.1 n-h-1 NN as the starting point and editing

Target:-
    1. DNN with flexible input size, n_hidden_units in each layer and output units and activation functions
    2. Vectorized implementation
    3. Include the regularization and drop out as well

'''
class FAwD: 
    def __init__(self,
                 n_input_neurons=1,
                 n_hidden_neurons=12,
                 intermidiate_activation_function='sigmoid',
                 learning_rate=1e-2,
                 n_iters=1_000,
                regularization_strength=0,
                dropout_keep_prob=1):
        self.n_input_neurons = n_input_neurons
        self.n_hidden_neurons = n_hidden_neurons
        
        assert intermidiate_activation_function in  ['sigmoid','relu']
        self.activation_function = {'sigmoid':sigmoid,'relu':relu}[intermidiate_activation_function]
        
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.regularization_strength = regularization_strength
        self.dropout_keep_prob = dropout_keep_prob
        
    def fit(self,X,y):
        assert X.shape[0] == self.n_input_neurons
        assert y.shape[0] == 1
        assert y.shape[1] == X.shape[1], "Number of examples : should match in inputs and targets"

        self.cost_history = []

        # define hyper parameters.
        m = y.shape[1] # number of examples given
        n = self.n_input_neurons # input feature count
        h = self.n_hidden_neurons # hidden neuron count

        #initialize weights and biases.
        w = np.random.randn(h,1)
        b = np.random.randn(1,1)
        V = np.random.randn(h,n)
        c = np.random.randn(h,1)

        #fine tune parameters.
        start_time = time.time()
        for i in range(self.n_iters):
                
                # progress message : every 100 iteration except for the first one
                if i and not i%100:
                    time_now = time.time()
                    time_remaining = get_nice_time_dura_str ((self.n_iters - i) * (time_now - start_time) / i)
                    print(f"iteration : {i} ---> ETA : {time_remaining} ",end="\r")
                    

                #forward pass
                H = self.activation_function(V@X + c)
                # drop out
                should_drop = np.random.rand(H.shape[0],H.shape[1]) < self.dropout_keep_prob
                H *= should_drop
                H /= self.dropout_keep_prob
                
                y_pred = w.T@H + b

                #calculate the cost for the current parameters.
                cost = 1/(2*m) * np.sum((y_pred - y)**2)
                self.cost_history.append(cost)

                #backpropagation
                diff = (y_pred - y)
                
                if self.activation_function == sigmoid:
                    high_level_derivative = H * (1 - H)
                elif self.activation_function == relu:
                    high_level_derivative = np.where(H>=0,1.,0.)
 


                dEdb  = diff.sum() / m
                dEdw = H@diff.T / m
                dEdc = w * (high_level_derivative@diff.T) / m
                dEdV = w *  ( (diff * high_level_derivative)@X.T ) / m

                # update rule : gradient decent.
                # consider regularization too 
                b *= (1 - self.regularization_strength * self.learning_rate / m)
                b -= self.learning_rate * dEdb
                w *= (1 - self.regularization_strength * self.learning_rate / m)
                w -= self.learning_rate * dEdw
                V *= (1 - self.regularization_strength * self.learning_rate / m)
                V -= self.learning_rate * dEdV
                c *= (1 - self.regularization_strength * self.learning_rate / m)
                c -= self.learning_rate * dEdc



        #save tuned parameters
        self.b = b
        self.w = w
        self.V = V
        self.c = c
        
        time_now = time.time()
        total_time = get_nice_time_dura_str(time_now - start_time)
        print(f"Training ended : n_iters: {self.n_iters} with learning_rate : {self.learning_rate}. Time taken : {total_time}")
        
    def show_cost_history(self,log=False):
        #plot the cost history
        
        history_vals = self.cost_history
        title = "Cost vs. iteration"
        if log:
            history_vals = np.log(self.cost_history)
            title = "LOG(Cost) vs. iteration"
            
        
        fig,ax = plt.subplots(1)
        ax.plot(history_vals)
        ax.set_title(title)
        ax.grid()
        plt.show()
    
    def get_final_cost(self):
        return self.cost_history[-1]
        
    def predict(self,X):
        
        H = self.activation_function(self.V@X + self.c)
        y_pred = (self.w).T@H + self.b
        
        return y_pred
    
    

    
    
# define activation functions globally
def sigmoid(t):
        return 1/ ( 1 + np.exp(-t) )
      
def relu(t): # from chat gpt : this is safe for any dimension array t
    return np.maximum(0,t)
    
    
    
    
# helper functions
def get_nice_time_dura_str(time_in_secs):
    time_in_secs = round(time_in_secs,2)
    if time_in_secs >= 60:
        n_mins = int(time_in_secs//60)
        n_secs = round(time_in_secs%60,2)
        return f"{n_mins} min {n_secs} secs"
    return f"{time_in_secs} secs"
    

In [29]:
class DNN:
    def __init__(self,layers):
        # layers :  list of dicts containing later information.
        assert type(layers) in  (list,tuple)
        assert len(layers) >= 2, "At least an input and an output layer should be there"
        
        # validate each layer
        for i,layer in enumerate(layers):
            if i==0:
                assert layer["type"] == "input", "first layer should be input"
            elif i==len(layers)-1:
                assert layer["type"] == "output", "last layer should be output"
            else:
                assert layer["type"] == "hidden", "middle layers should be hidden"
                assert layer["activation_function"] in ("relu","sigmoid","linear")
                
            assert type(layer["units"]) == int
        #done: validation
        
        #save these info
        self.layers = layers
        
    
    def build(self,show=0):
        # the parameters will be initialized with required dimentions
        
        self.W = [None] # this array will keep weight matrix  per each layer (l=1,2,..,L) ... first elem as a placeholde to indeces to work fine
        self.B = [None] # this array will keep bias vector  per each layer (l=1,2,..,L) ... first elem as a placeholde to indeces to work fine
        self.activation_def = [None] # the pointer to respecting activation function's definition per each layer (l=1,2,..,L) ... first elem as a placeholde to indeces to work fine
        self.activation_derivative_def = [None] # the pointer to respecting activation function's derivative's  
                                                                #definition per each layer (l=1,2,..,L) ... first elem as a placeholde to indeces to work fine
        
        self.A = [] # keep all intermidiate activation matrix per each layer (l=1,2,..,L)
        self.Z = [] # keep all intermidiate pre-activation matrix per each layer (l=1,2,..,L)
        
        #initializing
        for i in range(len(self.layers)):
            
            # anything for l=0,1,2,..,L ? 
            # NO
             
            
            if i: #only start from layer 1,2,..,L
                weight_matrix = np.random.randn(self.layers[i]["units"],self.layers[i-1]["units"])
                bias_vector = np.zeros((self.layers[i]["units"],1))
                self.W.append(weight_matrix)
                self.B.append(bias_vector)
                   
               
                self.activation_def.append(ACTIVATION_FUNC[self.layers[i]["activation_function"]])
                self.activation_derivative_def.append(ACTIVATION_FUNC_DERI[self.layers[i]["activation_function"]])
           
            
        if show: 
            print(self.params) 
            
    def batch_fit(self,X,Y,cost_function='least_square',n_iters=1_000,learning_rate=1e-3):
        assert cost_function in ('least_square','binary_cross_entropy')
        
        ##### FWD PASS #####
        # r3ki3g : assumes X is pre-scaled / normalized
        self.A[0] = X
        self.Z[0] = None # not useful but need to store something to indices to work out properly
        
        # these are some place holders to keep the arrays in required size
        self.dZ = [None for i in range(len(self.layers))]
        self.dA = [None for i in range(len(self.layers))]
        self.dW = [None for i in range(len(self.layers))]
        self.dB = [None for i in range(len(self.layers))]
        
        cost_history = []
        
        for _ in range(n_iters):
                
                #current cost calc and storing
                cost = 
            
            
                # loop though each layer and calculate the pre-activations(Z) and activations(A)
                for i in range(1,len(self.layers)):
                    self.Z[i] = self.W[i]@self.A[i-1]+self.B[i]
                    self.A[i] = self.activation_def[i](self.Z[i])

                # now all the activations in the NN are calculated and stored

                ##### BACK PROP #####
                # The order l = L,L-1,L-2,...,3,2,1 (and no 0)
                for i in range(len(self.layers) -1 , 0 , -1): # i=0 is excluded  # note :- L is at  len(self.layers) -1 index

                    # dZ[L] depends on the choice of cost function
                    if i == len(self.layers) -1:
                                if cost_function == 'binary_cross_entropy':
                                    self.dZ[i] = self.A[i] - Y
                                else:
                                    raise Exception("not implemented yet")



                    else: # not the last layer
                        self.dZ[i] =  (self.W[i+1].T @ self.dZ[i+1]) * self.activation_derivative_def[i](self.Z[i])


                    # calculate gradients
                    self.dW[i] = self.dZ[i]@self.A[i-1] / m
                    self.dB[i] = np.sum(self.dZ[i],axis=1,keepdims=1) / m

                    # gradient decent
                    self.W[i] -= learning_rate * self.dW[i]
                    self.B[i] -= learning_rate * self.dB[i]

                
                
            
        
                      
        
        
        
        
        
        
        
            
        
        
        
        
     
    
    
    
    
###### HELPER FUNCTIONS FOR DNN CLASS ###########    
    
# define activation functions globally
def sigmoid(t):
        return 1/ ( 1 + np.exp(-t) )
      
def relu(t): # from chat gpt : this is safe for any dimension array t
    return np.maximum(0,t)

def relu_deri(t):
    return np.where(t>=0,1.,0.)

def sigmoid_deri(t):
    return (1-t) * t

def linear(t):
    return t

def linear_deri(t):
    return 1
                
ACTIVATION_FUNC = {"relu":relu,"sigmoid":sigmoid,"linear":linear}
ACTIVATION_FUNC_DERI = {"relu":relu_deri,"sigmoid":sigmoid_deri,"linear":linear_deri}

In [32]:
my_dnn = DNN(layers=[
    
    {
        "type":"input",
        "units":1
    },
    
    {
        "type":"hidden",
        "units":16,
        "activation_function":"relu",
        "regularization_strength":1e-3,
        "dropout_keep_prob":1.0
    },

    {
        "type":"hidden",
        "units":16,
        "activation_function":"relu",
        "regularization_strength":1e-3,
        "dropout_keep_prob":1.0
    },
    
    {
        "type":"output",
        "units":1,
        "activation_function":"sigmoid",
        "regularization_strength":1e-3,
        "dropout_keep_prob":1.0
    }
    
])

my_dnn.build()



In [28]:
#testing

IndexError: list assignment index out of range