In [None]:
import numpy as np

In [1]:
class FCC:
    def __init__(self, input_dims, hidden_dims, num_classes):
        self.input_dims = input_dims
        self.hidden_dims = hidden_dims
        self.num_classes = num_classes
        self.cache = {}
    
    def softmax(self, x):
        e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return e_x/np.sum(e_x, axis=1, keepdims=True)
    
    def initialize_parameters(self)    
        self.W1 = np.randonm.randn(self.input_dims, self.hidden_dims)/np.sqrt(self.input_dims)
        self.B1= np.zeros(1, self.hidden_dims)
        self.W2 = np.randonm.randn(self.hidden_dims, self.hidden_dims)/np.sqrt(self.hidden_dims)
        self.B2= np.zeros(1, self.hidden_dims)
        self.W3 = np.randonm.randn(self.hidden_dims, self.num_classes)/np.sqrt(self.hidden_dims)
        self.B3= np.zeros(1, self.num_classes)
        
    def forward_propagation(self, inputs):
        
        self.Z1 = np.dot(inputs, self.W1) + self.B1
        self.A1 = np.maximum(0, self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.B2
        self.A2 = np.maximum(0, self.Z2)
        self.Z3 = np.dot(self.A2, self.W3) + self.B3        
        self.A3 = self.softmax(self.Z3)
    
    def compute_loss(self, Y, Y_pred):
        m = Y.shape[1]
        loss = -np.sum(np.multiply(np.log(Y_pred), Y) + np.multiply(np.log(1 - Y_pred), Y))
        loss = (1/m)*loss
        return loss
    
    def backward_propagation(self, X, Y):
        m = Y.shape[1]
        # A3 is of size [batch, num_classes], Y is of size [batch, num_classes] one hot encoding
        self.dZ3 = self.A3 - Y
        #dA2 = dZ3*W3 => [batch_size, num_classes]*[hidden_dims, num_classes].T => C
        self.dA2 =  np.dot(self.dZ3, self.dW3.T)
        #dZ2 = dA2  
        dZ2 = np.multiply(self.dA2, np.int64(self.A2 > 0))
        #dA1 = dZ2*W2 => [batch_size, hidden_dims]*[hidden_dims, hidden_dims].T => C
        self.dA1 =  np.dot(self.dZ2, self.dW2.T)        
        #dZ1 = dA1  
        dZ1 = np.multiply(self.dA1, np.int64(self.A1 > 0))
        
        #Divide by m because this is over a batch
        #dW3 = A2*dZ3 => [batch_size, hidden_dim]T*[batch_size, num_classes] => [hidden_dims, num_classes]
        self.dW3 = 1/m * np.dot(self.A2.T, self.dZ3)
        #dB3 = dZ3 => sum over [batch_size, num_classes] because dB3 is of size [1, num_classes]
        self.dB3 = 1/m * np.sum(self.dZ3, axis=0, keepdims=True)
        #dW2 = A1*dZ2 => [batch_size, hidden_dim]T*[batch_size, hidden_dims] => [hidden_dims, hidden_dims]
        self.dW2 = 1/m * np.dot(self.A1.T, self.dZ2)
        #dB2 = dZ2 => sum over [batch_size, hidden_dims] because dB2 is of size [1, hidden_dims]
        self.dB2 = 1/m * np.sum(self.dZ2, axis=0, keepdims=True)
        #dW1 = inputs*dZ1 => [batch_size, input_dims]T*[batch_size, hidden_dims] => [input_dims, hidden_dims]
        self.dW1 = 1/m * np.dot(self.inputs.T, self.dZ1)
        #dB1 = dZ1 => sum over [batch_size, hidden_dims] because dB3 is of size [1, hidden_dims]
        self.dB1 = 1/m * np.sum(self.dZ1, axis=0, keepdims=True)        
    
    def update_parameters(self, learning_rate):
        self.W1 = self.W1 - learning_rate * self.dW1
        self.B1 = self.B1 - learning_rate * self.dB1
        self.W2 = self.W2 - learning_rate * self.dW2
        self.B2 = self.B2 - learning_rate * self.dB2
        self.W3 = self.W3 - learning_rate * self.dW3
        self.B3 = self.B3 - learning_rate * self.dB3        
        

SyntaxError: invalid syntax (<ipython-input-1-55cd0fa3b856>, line 12)