In [3]:
import numpy as np

In [206]:
class DeepNN():
    def __init__(self, X, Y, dims_of_layers, activations, alpha = 0.01):
        #dims_of_layers - list of number of units in each layer (first element - num of features in input)
        #activations - activation function applied to each layer
        
        #dims_of_layers[0] - n of features in input
        #activations[0] - activation for first hidden layer
        #we support only 3 activation funcs: linear, sigmoid, relu
        
        #X.shape should be (n_features, m_examples)
        #Y.shape should be (1, m_examples)
        self.X = X
        self.Y = Y
        
        self.m_examples = X.shape[1]
        
        self.dims_of_layers = dims_of_layers
        self.n_layers = len(activations)
        
        self.activations = activations
        self.params = dict()
        
        self.learning_history = []
        self.alpha = alpha
        
        #setting cache dicts for backpropogation
        
        self.cache = dict()
        
    
    def initialize_params(self):
        
        for i in range(1, len(self.activations) + 1):
            
            #setting parameters layer by layer
            self.params["W" + str(i)] = np.random.randn(self.dims_of_layers[i], self.dims_of_layers[i-1])
            self.params["b" + str(i)] = np.zeros((self.dims_of_layers[i], 1))
            
     
    
    def activation(self, Z, function="linear"):
        if function == "linear":
            return Z
        
        if function == "sigmoid":
            return 1 / (1 + np.exp(-Z))
        
        if function == "relu":
            return Z * (Z > 0)
    
    def forward_propogation(self, X):
        
        #X.shape = (n_features, m_examples)
        
        A_prev = X
        
        Z_current = np.dot(self.params["W1"], A_prev) + self.params["b1"]
        A_current = self.activation(Z_current, function=self.activations[0])
        
        self.cache["Z1"] = Z_current
        self.cache["A1"] = A_current
        
        for i in range(1, len(self.activations)):
            A_prev = A_current
            
            #A_prev - cache["A" + str(i)]
            Z_current = np.dot(self.params["W" + str(i+1)], A_prev)
            A_current = self.activation(Z_current, function=self.activations[i])
            
            #keeping values in cache for backprop
            self.cache["Z" + str(i+1)] = Z_current
            self.cache["A" + str(i+1)] = A_current            

            
        predictions = A_current
        
        return predictions
        
    def compute_cost(self, predictions, cost_function="cross_entropy"):
        #leave cost func as a parameter 
        #so that we can use it futher not only for classification
        
        
        
        
        if cost_function == "cross_entropy":
            
            #lets cut off a  tiny constant to avoid log0 problem
            epsilon = 10 ** -15
            
            predictions = np.clip(predictions, epsilon, 1-epsilon)
            
            #BinaryCrossEntropy
        
            cost = (self.Y * np.log(predictions) + 
                    (1 - self.Y) * np.log(1 - (predictions)) * (-1 / self.m_examples))
            
            cost = np.sum(cost, axis=1, keepdims=True) #sum up the columns
            
        #we will have more cost functions here later...
        
        return cost
    
    def deriv_of_cost(self, predictions, cost_function="cross_entropy"):
        
        if cost_function == "cross_entropy":
            
            #avoiding division by zero
            epsilon = 10 ** -15
            predictions = np.clip(predictions, epsilon, 1-epsilon)
            
            dAL = (predictions - self.Y) / (predictions * (1 - predictions))
            
        #we will have more cost functions here later...
        
        return dAL
            
            

In [207]:
dims = [3, 4, 2, 1]
activation = ["relu", "relu", "sigmoid"]

X = np.ones((3, 6))
y = np.array([[1,1,1,0,0,0]])




In [208]:
deepnn = DeepNN(X, y, dims, activation)
deepnn.initialize_params()

In [209]:
x = np.array([10, 5, 9]).reshape(-1, 1)

predictions = np.array([[0,0,0,1,1,1]])

In [210]:
np.round(deepnn.compute_cost(predictions), 3)

array([[-86.347]])

In [211]:
deepnn.deriv_of_cost(predictions)

array([[-1.00000000e+15, -1.00000000e+15, -1.00000000e+15,
         1.00079992e+15,  1.00079992e+15,  1.00079992e+15]])

In [212]:
1 / 10**-15

999999999999999.9