In [2]:
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt

# desired architecture: 
#     3 layer nn (300,150)
#     softmax activation layer
#     cross entropy loss (gradients will have to be accordingly made to be different for each output (jacobian))

In [19]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()
#train_y = np.reshape(train_y,(train_y.shape[0],1))
#test_y = np.reshape(test_y,(test_y.shape[0],1))

print("train_x shape: {}".format(train_x.shape))
print("train_y shape: {}".format(train_y.shape))
print("test_x shape: {}".format(test_x.shape))
print("test_y shape: {}".format(test_y.shape))

train_x shape: (60000, 28, 28)
train_y shape: (60000,)
test_x shape: (10000, 28, 28)
test_y shape: (10000,)


In [71]:

def plot_image(idx,dataset="train"):
    x  = train_x[idx] if dataset == "train" else test_x[idx]
    y = train_y[idx] if dataset == "train" else test_y[idx]
    print("Showing pixel plot of a {} from {} dataset".format(y,dataset))
    plt.imshow(x)


def one_hot_encode(digit: int) -> np.array:
    encoded = np.zeros(10)
    encoded[digit] = 1
    return encoded


def format_X(X: np.array) -> np.array:
    #assumes X is in the format (m x n0 x n1) where m is num samples, and n0 and n1 have to be collapsed into one frame
    return np.reshape(X,(X.shape[0],X.shape[1]*X.shape[2])).T

def format_Y(Y: np.array) -> np.array:
    return np.array([one_hot_encode(y) for y in Y]).T

class Net: 
    #input size can be variable, output layer will be a softmax of size given by train_Y
    # X needs to be of the shape n x m, where m is num samples, n is num inputs
    # Y needs to be of the shape o x m, where m is num samples, o is number of neurons in softmax layer
    def __init__(self,layer_dims,train_X,train_Y):
        
        self.train_X = train_X
        self.train_Y = train_Y
        self.output_size = train_Y.shape[0]
        self.input_size = train_X.shape[0] 
        assert(train_X.shape[1] == train_Y.shape[1])
        self.sample_size = train_X.shape[1]
        
        self.weights = [np.random.randn(layer_dims[0],self.input_size)]
        self.biases = [np.zeros(np.zeros((l_d,1))) for l_d in layer_dims]

        
        for idx,l_d in enumerate(layer_dims[1:],1):
            self.weights.append(np.random.randn(l_d,self.weights[idx-1].shape[0]))
            
        self.weights.append(np.random.randn(self.output_size,layer_dims[-1]))
        self.biases.append(np.zeros(self.output_size,1))
    
        self.cache = {
            'A': [self.train_X],
            'Z': [None], # just so that Z[i] corresponds to the Z of the i-th layer (as opposed to i-1)
            'dA': [],
            'dZ': [], 
            'dW': [], 
            'dB': [],
        }
    
    @staticmethod
    def sigmoid(z):
        return 1/(1+np.exp(z))
    
    @staticmethod
    def softmax(z):
        """computing softmax in a numerically stable way"""
        offset = z - np.max(z,axis=0)
        e_offset = np.exp(offset)
        return e_offset/np.sum(e_offset)
    
    
    
    @staticmethod
    def compute_cross_entropy_loss(Y,Yhat):
        """assumes Y and Yhat are of the shape (O x m ) where m is number of samples, o is ouputs"""
        assert (Y.shape == Yhat.shape)
        m = Y.shape[1]
        L = -np.multiply(Y,np.log(Yhat)) # Loss matrix over all samples
        J = (1/m)*np.sum(np.sum(L,axis=1))
        return J 
    
       
    
    @classmethod
    def forward_propagate(self):
        for idx,W in enumerate(self.weights[:-1],1):
            self.cache['Z'].append(np.dot(W,self.cache['A'][idx-1]) + self.biases[idx])
            self.cache['A'].append(self.sigmoid(self.cache['Z'][idx]))
        
        self.cache['Z'].append(np.dot(W,self.cache['A'][-1]) + self.biases[-1])
        self.cache['A'].append(self.softmax(self.cache['Z'][-1]))
        
        
        return self.cache['A'][-1] #Yhat 
    
    @classmethod
    def back_propagate(self,Y):
        """ back propapate final softmax layer individually, then loop through the remaining sigmoid layers to backprop"""
        m = Y.shape[1]
        dJ_dZ = self.A[-1] - Y
        dZ_dA = self.weights[-1]
        dZ_dW = self.A[-1]
        
        self.cache['dW'].insert(0,np.dot(dJ_dZ,dZ_dW.T)/m)
        self.cache['dB'].insert(0,np.sum(dJ_dZ,axis=1)/m)
        self.cache['dA'].insert(0,np.dot(self.weights[-1].T,dJ_dZ))
        
        for idx,weights in reversed(list(enumerate(self.weights))):
            
        
    
        


In [28]:
formatted_X = format_X(train_x)
formatted_Y = format_Y(train_y)
print(formatted_X.shape)
print(formatted_Y.shape)

(784, 60000)
(10, 60000)


In [73]:
Net.softmax([[1000,2000,3000],[1000,2000,3000],[1000,2000,3000]])
np.sum([[1000,2000,3000],[1000,2000,3000],[1000,2000,3000]],axis=0)

array([3000, 6000, 9000])

In [76]:
r = [[1,1,1],[2,2,2],[3,3,3]]

np.sum(r,axis=1)

array([3, 6, 9])

0 3
1 2
2 1
