In [None]:
def initialize_parameters():
    initializer = tf.keras.initializers.GlorotNormal(seed=1)
    W1 = tf.Variable(initializer(shape = (10, 512) ))
    b1 = tf.Variable(initializer(shape = (10,1)))
    W2 = tf.Variable(initializer(shape = (4, 10) ))
    b2 = tf.Variable(initializer(shape = (4,1)))
    
    parameters = {"W1" : W1, "b1" : b1, "W2" : W2, "b2" : b2}
    return parameters
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    Z1 = tf.math.add(tf.linalg.matmul(W1, X), b1)
    A1 = tf.keras.activations.relu(Z1)
    Z2 = tf.math.add(tf.linalg.matmul(W2, A1), b2)
    
    
    return Z2    

def compute_total_loss(logits, labels):
    total_loss = tf.reduce_sum(tf.keras.losses.categorical_crossentropy(tf.transpose(labels),tf.transpose(logits),from_logits = True))
    return total_loss

def model(x_train_norm, y_train_label, x_test_norm, y_test_label, learning_rate = 0.1, num_epochs = 1500, print_cost = True):
    costs = []
    train_acc = []
    test_acc = []
    
    parameters = initialize_parameters()
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    test_accuracy = tf.keras.metrics.CategoricalAccuracy()
    train_accuracy = tf.keras.metrics.CategoricalAccuracy()
    m = x_train_norm.shape[1]
    # Do the training loop
    for epoch in range(num_epochs):

        epoch_total_loss = 0.
        
        #We need to reset object to start measuring from 0 the accuracy each epoch
        train_accuracy.reset_states()
        
        with tf.GradientTape() as tape:
            # 1. predict
            Z2 = forward_propagation(x_train_norm, parameters)

            # 2. loss
            minibatch_total_loss = compute_total_loss(Z2, y_train_label)  
        train_accuracy.update_state(y_train_label, Z2)

        trainable_variables = [W1, b1, W2, b2]
        grads = tape.gradient(minibatch_total_loss, trainable_variables)
        optimizer.apply_gradients(zip(grads, trainable_variables))
        epoch_total_loss += minibatch_total_loss
        
        # We divide the epoch total loss over the number of samples
        epoch_total_loss /= m

        # Print the cost every 10 epochs
        if print_cost == True and epoch % 10 == 0:
            print ("Cost after epoch %i: %f" % (epoch, epoch_total_loss))
            print("Train accuracy:", train_accuracy.result())
            
            # We evaluate the test set every 10 epochs to avoid computational overhead
            Z2 = forward_propagation(x_test_norm, parameters)
            test_accuracy.update_state(y_test_label, Z2)
            print("Test_accuracy:", test_accuracy.result())

            costs.append(epoch_total_loss)
            train_acc.append(train_accuracy.result())
            test_acc.append(test_accuracy.result())
            test_accuracy.reset_states()



    return parameters, costs, train_acc, test_acc
    
    
parameters, costs, train_acc, test_acc = model(x_train_norm,y_train_label, x_test_norm, y_test_label, num_epochs=1000)