## Ortega Forward Propagating Neural Network

This is an implementation for a neural network training on an MNIST model using python lists. Although not practical at all for real life, it was a nice exercise in combining knowledge of data structures and algorithms and incorporating this knowledge into the field of Machine Learning. I had a blast :) . Weights and Hidden layers from a fully trained Tensorflow model and fed as either weights for the model or for testing purposes by comparing with the outputs of the hidden layers of my Forward Propagating Neural Network. At a future point, including back-propagation to incorporate a fully-trainable model are not out of the picture as it would only require backwards functionality for the 2 layer classes.

In [1]:
import os
import numpy as np
import tensorflow as tf
import math
from iteration_utilities import deepflatten

## Tensor Implementation

In [2]:
class Tensor():
    def __init__(self, data, shape):
        self.shape = shape
        self.data = data
        self.tensor = self.shape_tensor() if shape else []
        
    @property
    def transpose(self):
        tensor = Tensor([], self.shape[:-2] + [self.shape[-1], self.shape[-2]])
        if len(self.shape) == 2:
            for x in range(self.shape[0]):
                for y in range(self.shape[1]):
                    tensor.tensor[y][x] = self.tensor[x][y]
        else:
            for i in range(self.num_entries(is_batch=True)):
                t = tensor.get_entry(tensor.entry_loc(i, is_batch=True))
                s = self.get_entry(self.entry_loc(i, is_batch=True))
                for x in range(self.shape[-2]):
                    for y in range(self.shape[-1]):
                        t[y][x] = s[x][y]
        return tensor
    
    def num_entries(self, is_batch=False) : 
        num_entries = 1
        dims = self.shape[:-2] if is_batch else self.shape
        for dim in dims: 
             num_entries *= dim  
        return num_entries
    
    def entry_loc(self, entry_num, is_batch=False):
        entry_loc = []
        dims = self.shape[:-2][::-1] if is_batch else self.shape[::-1]
        for elem in dims:
            entry_loc = [entry_num % elem] + entry_loc
            entry_num  = entry_num // elem        
        return entry_loc
    
    def shape_compatible(self, tensor2, op):
        if len(self.shape) == len(tensor2.shape) and op == 'add':
            zipped = zip(self.shape, tensor2.shape)
        elif len(self.shape[:-2]) == len(tensor2.shape[:-2]) and op == 'matmul':
            zipped = zip(self.shape[:-2], tensor2.shape[:-2]) 
        else:
            return False
        return all(x == y for x,y in zipped)

    def shape_broadcastable(self, tensor2):
        if len(tensor2.shape) == 2 and self.shape[-1] == tensor2.shape[1] \
           and tensor2.shape[0] == 1:
            return True
        elif len(tensor2.shape) == 1 and self.shape[-1] == tensor2.shape[0]:
            return True
        else:
            return False
 
    def set_entry(self, entry_loc, entry):
        def set_helper(tensor, entry_loc, entry):
            if len(entry_loc) == 1:
                tensor[entry_loc[0]] = entry
            else:
                set_helper(tensor[entry_loc[0]], entry_loc[1:], entry)
        
        set_helper(self.tensor, entry_loc, entry)
        
    def get_entry(self, entry_loc):
        def get_helper(tensor, entry_loc):
            if len(entry_loc) == 1:
                return tensor[entry_loc[0]]
            else:
                return get_helper(tensor[entry_loc[0]], entry_loc[1:])
        
        return get_helper(self.tensor, entry_loc)
 
    def shape_tensor(self):
        assert all(isinstance(dim, int) and dim > 0 for dim in self.shape)
        assert all(isinstance(pt, int) or isinstance(pt, float) for pt in self.data)
        
        def build_tensor(data, shape):
            return [build_tensor(data, shape[1:]) for i in range(shape[0])] if shape else 0.0
            
        self.tensor = build_tensor(self.data, self.shape)
        
        if self.tensor:
            num_entries = min(len(self.data), self.num_entries(is_batch=False))
            for i in range(num_entries):
                self.set_entry(self.entry_loc(i), self.data[i]) 
        return self.tensor

    def apply(self, op):
        sigmoid = lambda x: 1 / (1 + (math.e ** (-1 * x)))
        tanh = lambda x: ((math.e ** x) - math.e ** (-1 * x)) / ((math.e ** x) + math.e ** (-1 * x))
        relu = lambda x: x if x > 0 else 0
        tensor = Tensor([], self.shape)

        for i in range(self.num_entries()):
            x = self.get_entry(self.entry_loc(i))
            if op == 'sigmoid':
                tensor.set_entry(tensor.entry_loc(i), sigmoid(x))
            elif op == 'tanh':
                tensor.set_entry(tensor.entry_loc(i), tanh(x))
            else:
                tensor.set_entry(tensor.entry_loc(i), relu(x))

        return tensor
            
    
    def add(self, tensor2):
        
        tensor = Tensor([], self.shape)
        if self.shape_compatible(tensor2, 'add'):
            for i in range(self.num_entries()):
                total = self.get_entry(self.entry_loc(i)) + \
                        tensor2.get_entry(tensor.entry_loc(i))
                tensor.set_entry(self.entry_loc(i), total)   
        elif self.shape_broadcastable(tensor2):
            n = tensor2.shape[-1]
            for i in range(self.num_entries()):
                total = self.get_entry(self.entry_loc(i)) + \
                        tensor2.get_entry(tensor2.entry_loc(i % n))
                tensor.set_entry(tensor.entry_loc(i), total)
        else:
            raise ValueError(f'''incompatible shapes for add: t1.shape {self.shape}, t2.shape {tensor2.shape}''')
        
        return tensor
  
    def dot_product(self, tensor1, tensor2):
        total = 0
        
        for x,y in zip(tensor1, tensor2):
            total += x * y
        return total
    
    def matmul(self, tensor2):   
        if self.shape_compatible(tensor2, 'matmul'):
            t = Tensor([], self.shape[:-2] + [self.shape[-2], tensor2.shape[-1]])
            tensor2 = tensor2.transpose
            
            def matmul_helper(tensor1, tensor2):
                nonlocal current_batch
                nonlocal t
                for x in range(t.shape[-2]):
                    for y in range(t.shape[-1]):
                        current_batch[x][y] = self.dot_product(tensor1[x], tensor2[y])
                
            if len(t.shape) > 2:
                for i in range(t.num_entries(is_batch=True)):
                    current_batch = t.get_entry(t.entry_loc(i, is_batch=True))
                    t1 = self.get_entry(self.entry_loc(i, is_batch=True))
                    t2 = tensor2.get_entry(tensor2.entry_loc(i, is_batch=True))
                    matmul_helper(t1, t2)
            else:
                current_batch = t.tensor
                matmul_helper(self.tensor, tensor2.tensor)
                
            tensor2 = tensor2.transpose  
            return t
   
        else:
            raise ValueError(f'''incompatible shapes for matmul: t1.shape {self.shape}, t2.shape {tensor2.shape}''') 
            
    def softmax(self):
        if len(self.shape) == 2:
            num_points, num_classes = self.shape[0], self.shape[1]
            t = Tensor(self.data, [num_points, num_classes])
            for pt in range(num_points):
                
                normalization = 0
                max_so_far = float('-inf')
                for c in range(num_classes):
                    if max_so_far < self.tensor[pt][c]:
                         max_so_far = self.tensor[pt][c]
                
                for c in range(num_classes): 
                    t.tensor[pt][c] = math.e ** (self.tensor[pt][c] - max_so_far)
                    normalization += t.tensor[pt][c]
                
                for c in range(num_classes):
                    t.tensor[pt][c] /= normalization
        else:
            raise ValueError(f'''incompatible shape for softmax: shape {self.shape}''')
        return t

    def argmax(self):
        if len(self.shape) == 2:
            num_points, num_classes = self.shape[0], self.shape[1]
            t = Tensor([], [num_points])
            for pt in range(num_points):
                
                max_c, max_prob   = 0, 0
                for c in range(num_classes):
                    if max_prob < self.tensor[pt][c]:
                        max_c = c
                        max_prob = self.tensor[pt][c]
                t.tensor[pt] = max_c        
        else:
            raise ValueError(f'''incompatible shape for softmax: shape {self.shape}''')
        return t


## Layer Classes (Dense Layer + Activation Layer) 

In [3]:
class Dense(): 
    def __init__(self, data, weights, bias):
        self.data = data
        self.weights = weights
        self.bias = bias
    
    def forward(self):
        return self.data.matmul(self.weights).add(self.bias)


class Activation():
    def __init__(self, data,  activation):
        valid_ops = {'sigmoid', 'tanh', 'relu'}
        self.data = data
        if activation not in valid_ops:
             raise ValueError(f'''invalid activation fn: {op} not in {valid_ops}''') 
        self.activation = activation
        
    
    def forward(self):
        return self.data.apply(self.activation)

## Model Class

In [4]:
class MNIST_Model():
    def __init__(self, weights):
        self.weights = weights
        self.tensors = None
    
    def predict(self, data):
        if not (self.tensors \
            and self.tensors['predict'].shape[0] == data.shape[0]):
            self.run(data)
        return self.tensors['predict']
        
    
    def run(self, data):
        self.tensors = {}
        self.tensors['l0'] = np.array(data.tensor)
        layer1 = Dense(x_test, self.weights['w1'], self.weights['b1']).forward()
        act1 = Activation(layer1, 'sigmoid').forward()
        self.tensors['l1'] = np.array(act1.tensor)
        print('l1 done')
        layer2 = Dense(act1, self.weights['w2'], self.weights['b2']).forward()
        act2 = Activation(layer2, 'sigmoid').forward()
        self.tensors['l2'] = np.array(act2.tensor)
        print('l2 done')
        layer3 = Dense(act2, self.weights['w3'], self.weights['b3']).forward()
        act3 = Activation(layer3, 'sigmoid').forward()
        self.tensors['l3'] = np.array(act3.tensor)
        print('l3 done')
        layer4 = Dense(act3, self.weights['w4'], self.weights['b4']).forward()
        act4 = Activation(layer4, 'sigmoid').forward()
        self.tensors['l4'] = np.array(act4.tensor)
        print('l4 done')
        softmax = act4.softmax()
        self.tensors['softmax'] = np.array(softmax.tensor)
        print('softmax done')
        preds = softmax.argmax()
        self.tensors['predict'] = np.array(preds.tensor)
        print('argmax done')
        return self.tensors
    
    def accuracy(self, y_pred, y):
        correct_pred = 0
        for i in range(len(y_pred)):
            if y_pred[i] == y[i]:
                correct_pred += 1
        print('correct preds: ', correct_pred)
        return correct_pred / len(y_pred)

### Loading Weight Data

In [5]:
model_weights = {}
names = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4']
for name in names:
    file = open(f'weights/{name}.txt', 'rb')
    model_weights[name] = np.load(file)

In [6]:
for name in model_weights.keys():
    print(name, model_weights[name].shape)

w1 (784, 784)
b1 (784,)
w2 (784, 200)
b2 (200,)
w3 (200, 20)
b3 (20,)
w4 (20, 10)
b4 (10,)


### Loading Model Data




In [7]:
mnist = tf.keras.datasets.mnist
tf.keras.backend.set_floatx('float64')

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape((60000, 784))
x_test = x_test.reshape((10000, 784))

In [8]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((60000, 784), (10000, 784), (60000,), (10000,))

### Pre-process weights/data for Network

In [9]:
for name in model_weights.keys():
    model_weights[name] = Tensor(list(deepflatten(model_weights[name].tolist())), 
                                 list(model_weights[name].shape))
    print(model_weights[name])

<__main__.Tensor object at 0x00000139F5650220>
<__main__.Tensor object at 0x00000139BE0DA670>
<__main__.Tensor object at 0x00000139F56398B0>
<__main__.Tensor object at 0x00000139BE0DAA30>
<__main__.Tensor object at 0x00000139BFA7FF40>
<__main__.Tensor object at 0x00000139BE0C0E50>
<__main__.Tensor object at 0x00000139BFA7F3A0>
<__main__.Tensor object at 0x00000139BFA7FB20>


In [10]:
x_train = Tensor(list(deepflatten(x_train.tolist())), list(x_train.shape))
print(x_train.shape)
y_train = Tensor(y_train.tolist(), list(y_train.shape))
print(y_train.shape)
x_test = Tensor(list(deepflatten(x_test.tolist())), list(x_test.shape))
print(x_test.shape)
y_test = Tensor(y_test.tolist(), list(y_test.shape))
print(y_test.shape)

[60000, 784]
[60000]
[10000, 784]
[10000]


### Testing Activation Class

In [11]:
data = [i for i in range(-1, 2)]

In [12]:
s_t = Tensor(data, [3])
t_t = Tensor(data, [3])
r_t = Tensor(data, [3])

a_1 = Activation(s_t, 'sigmoid')
a_2 = Activation(t_t, 'tanh')
a_3 = Activation(r_t, 'relu')

In [13]:
print(a_1.forward().tensor)
print(a_2.forward().tensor)
print(a_3.forward().tensor)

[0.2689414213699951, 0.5, 0.7310585786300049]
[-0.7615941559557649, 0.0, 0.7615941559557649]
[0, 0, 1]


### Testing Outputs of Hidden Layers

In [None]:
m = MNIST_Model(model_weights)
my_hidden_layers = m.run(x_test)

In [None]:
model_layers = {}
names = ['l0', 'l1', 'l2', 'l3', 'l4', 'softmax', 'predict']
for name in names:
    file = open(f'hidden_layer/{name}.txt', 'rb')
    model_layers[name] = np.load(file)

In [None]:
for name in model_layers.keys():
    my_layer = my_hidden_layers[name]
    test_layer = model_layers[name]
    if name == 'predict':
        test_layer = test_layer.argmax(axis=1)
    print(my_layer.shape, test_layer.shape, 'shapes')
    print(f'''output error for {name}:''', np.sum(np.abs(my_layer - test_layer)))

### Verifying Model Accuracy


In [None]:
print(f'''this is my model accuracy {m.accuracy(m.tensors['predict'], y_test.tensor)}''')