In [5]:
import numpy as np
import tensorflow as tf

In [6]:
class deepFuzzy:
    W = dict()
    b = dict()
    A = dict()
    Z = dict()
    grads = dict()
    def __init__(self,layers, x_train,x_test,y_train,y_test):
        self.layers = layers
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = y_train
        self.y_test = y_test
        self.L = len(layers)
    def initialize(self, initializer = 'random'):
        for i in range(1,self.L):
            self.W["W" + str(i)] = np.random.randn(self.layers[i][0],self.layers[i-1][0])*0.01
            self.b["b" + str(i)] = np.random.randn(self.layers[i][0],1)
    
    ###FORWARD PROP###
    @staticmethod
    def forward(A,W,b):
        Z = np.dot(W,A) + b
        return Z
    @staticmethod        
    def forwardAct(A,W,b,activation = 'sigmoid'):
        Z, linear_cache = deepfuzzy.forward(A,W,b)
        activation_cache = Z
        if activation == 'sigmoid':
            A_next = sigmoid(Z)
        if activation == 'relu':
            A_next = relu(Z)
        return A_next, Z
    
    def forwardProp(self):
        A = self.x_train
        self.A["A" + str(0)] = A 
        for i in range (1,self.L):
            A, Z = deepfuzzy.forwardAct(A, self.W[str(i)], self.b[str(i)], activation = layers[i-1][1])
            self.A["A" + str(i)] = A 
            self.Z["Z" + str(i)] = Z
            print(A.shape)

    def compute_cost(self):
        m = self.y_train.shape[1]
        Y = self.y_train
        AL = self.A["A" + str(self.L-1)]
        print(Y.shape)
        print(AL.shape)
        cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
        cost = np.squeeze(cost) 
        return cost
    
    ###BACKWARD PROP###
    @staticmethod    
    def backward(dZ, cache):
        A_prev, W, b = cache
        m = A_prev.shape[1]
        dW = 1.0/m * dZ.dot(A_prev.T)
        db = 1.0/m * np.sum(dZ,axis=1,keepdims=True)
        dA_prev = W.T.dot(dZ)
        return dA_prev, dW, db
    @staticmethod
    def backwardAct(dA, cache, activation):
        linear_cache, activation_cache = cache
        if activation == "relu":
            dZ = relu_backward(dA, activation_cache)
            dA_prev, dW, db = deepfuzzy.backward(dZ, linear_cache)
        elif activation == "sigmoid":
            dZ = sigmoid_backward(dA, activation_cache)
            dA_prev, dW, db = deepfuzzy.backward(dZ, linear_cache)
        return dA_prev, dW, db
    
    def backwardProp(self):
        L = self.L
        AL = self.A["A" + str(L-1)]
        m = AL.shape[1]
        Y = self.y_train.reshape(AL.shape) 
        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - self.AL))
        self.grads["dA" + str(L+1)]
#         current_cache = self.caches[L-1]
#         self.grads["dA" + str(L)], self.grads["dW" + str(L)], self.grads["db" + str(L)] = deepfuzzy.backwardAct(dAL, current_cache, "sigmoid")
        for l in reversed(range(L)):
            current_cache = (self.Z["Z" + str(l)],(self.A["A"+str(l)],self.W["W"+str(l)],self.b["b"+str(l)]))
            dA_prev_temp, dW_temp, db_temp = deepfuzzy.backwardAct(self.grads["dA"+str(l+2)], current_cache, activation = layers[i-1][1])
            self.grads["dA" + str(l + 1)] = dA_prev_temp
            self.grads["dW" + str(l + 1)] = dW_temp
            self.grads["db" + str(l + 1)] = db_temp
    def update_parameters(self,learning_rate):
        for l in range(self.L):
            self.W[str(l+1)] = self.W[str(l+1)] - learning_rate * self.grads["dW" + str(l+1)]
            self.b[str(l+1)] = self.b[str(l+1)] - learning_rate * self.grads["db" + str(l+1)]


In [7]:
layers = [[784,'relu'],[50, 'relu'],[12, 'relu'],[45, 'relu'],[10,'sigmoid']]

In [8]:
def load_mnist():
    mnist = tf.keras.datasets.mnist
    (x_train, y_train),(x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    nClass = 10
    x_train_flat = x_train.reshape(x_train.shape[0],-1).T
    x_test_flat = x_test.reshape(x_test.shape[0],-1).T
    y_train_onehot = np.eye(nClass)[y_train].T
    y_test_onehot = np.eye(nClass)[y_test]
    return x_train_flat, x_test_flat, y_train_onehot, y_test_onehot

In [9]:
x_train, x_test, y_train, y_test = load_mnist()