# Neural network implementation

In [1]:
import numpy as np
import os

from fault_tolerant_ml.data.mnist import MNist
from fault_tolerant_ml.ml import nn
import fault_tolerant_ml.ml.nn.activation as F

%reload_ext autoreload
%autoreload 2

## Read in data

In [2]:
data_dir = "../data"
filepaths = {
    "train": {
        "images": os.path.join(data_dir, "train-images-idx3-ubyte.gz"), "labels": os.path.join(data_dir, "train-labels-idx1-ubyte.gz")
    },
    "test": {
        "images": os.path.join(data_dir, "t10k-images-idx3-ubyte.gz"), "labels": os.path.join(data_dir, "t10k-labels-idx1-ubyte.gz")
    }
}
mnist = MNist(filepaths)

In [3]:
mnist

<MNist X_train=(60000, 784), y_train=(60000, 10), X_test=(10000, 784), y_test=(10000, 10)>

In [4]:
n_features, n_classes = mnist.X_train.shape[1], mnist.y_train.shape[1]
print(f"n_features={n_features}, n_classes={n_classes}")

n_features=784, n_classes=10


In [5]:
theta = np.random.randn(n_features, n_classes)

In [6]:
class NeuralNet(nn.Model):
    
    def __init__(self):
        
        self.fc1 = nn.Layer(n_inputs=784, n_outputs=128)
        self.fc2 = nn.Layer(n_inputs=128, n_outputs=10)
        
        self.act_fn = F.Sigmoid()
    
    def forward(self, x):
        
        z1 = self.fc1(x)
        a1 = self.act_fn(z1)
        z2 = self.fc2(a1)
        y_pred = self.act_fn(z2)
        
        return y_pred, z2, a1, z1
    
    def backward(self, x, y, a_n):
        
        y_pred, z2, a1, z1 = a_n
        # Output layer error
        delta2 = (y_pred - y)# * self.act_fn.grad(z2)
        # Gradient of cost function
        dw2 = np.dot(a1.T, delta2)
        # Backpropagate the error through the network
        delta1 = np.dot(delta2, self.fc2.W.T) * self.act_fn.grad(z1)
        # Calculate gradient
        dw1 = np.dot(x.T, delta1)
        # Gradient of biases equal to the error
        db2 = np.sum(delta2, axis=0, keepdims=True)
        db1 = np.sum(delta1, axis=0, keepdims=True)
        return dw2, db2, dw1, db1

In [7]:
def cross_entropy_loss(y_pred, y):
    return np.mean(-y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred))

In [8]:
def accuracy_score(y, y_pred):
    y_pred_ = y_pred.argmax(axis=1)
    y_ = y.argmax(axis=1)
    return np.sum(y_pred_==y_) / y_.shape[0]

In [9]:
# model = NeuralNet()
# print(model.fc1.shape)
# print(model.fc2.shape)
# epochs = 400
# learning_rate = 0.99
# m = mnist.X_train.shape[0]
# for epoch in np.arange(epochs):
    
#     # Feedforward
#     y_pred, z2, a1, z1 = model.forward(mnist.X_train)
    
#     # Calculate cost
#     loss = cross_entropy_loss(y_pred, mnist.y_train)
    
#     # Backprop
#     dw2, db2, dw1, db1 = model.backward(mnist.X_train, mnist.y_train, [y_pred, z2, a1, z1])
    
#     # Update weights
#     model.fc2.W = model.fc2.W - learning_rate * 1 / m * dw2
#     model.fc1.W = model.fc1.W - learning_rate * 1 / m * dw1
#     model.fc2.b = model.fc2.b - learning_rate * 1 / m * db2
#     model.fc1.b = model.fc1.b - learning_rate * 1 / m * db1
    
#     acc = accuracy_score(mnist.y_train, y_pred)
#     if epoch % 10 == 0:
#         print(f'epoch = {epoch}, loss = {loss:.3f}, TRAIN ACC = {acc:.3f}')
#     epoch += 1
    

## Autograd

## Graph

In [10]:
# g = Graph()

# g.set_as_default()

# X = Tensor(mnist.X_train)

# X + W

# W = Variable(np.random.rand(784, 128))

# b = Variable(np.zeros(shape=(1, 128)))

# z = add(matmul(X, W), b)

# z.input_nodes[0].input_nodes[0]

# z.compute()

In [11]:
from fault_tolerant_ml.ml.ops import tensor as ft

In [12]:
g = ft.Graph()
g.set_as_default()
X = ft.Tensor(mnist.X_train)
y = ft.Tensor(mnist.y_train)

In [13]:
W = ft.Tensor(np.random.randn(784, 128))

In [14]:
b = ft.Tensor(np.zeros((1, 784)))

In [15]:
a = ft.matmul(X, W)
# z = ft.add(W, b)

In [16]:
g.operations

[<fault_tolerant_ml.ml.ops.tensor.matmul at 0x12f6ce630>]

In [17]:
def evalulate(f):
    val = []
    for i, op in enumerate(f.operations):
        print(*op.input_nodes)
        val.append(op.compute(*op.input_nodes))

In [46]:
def traverse(f):
    
    operations = []
    def recurse(node):
        if isinstance(node, ft.Operation):
            for input_node in node.input_nodes:
                recurse(input_node)
        operations.append(node)
            
    recurse(f)
    return operations

In [47]:
g = ft.Graph()
g.set_as_default()
x1 = ft.Tensor(np.array(3))
x2 = ft.Tensor(np.array(7))

In [48]:
f = ft.square(ft.add(ft.add(x1, x1), x2))

In [49]:
traverse(f)

[Tensor(3, dtype=int64),
 Tensor(3, dtype=int64),
 add(),
 Tensor(7, dtype=int64),
 add(),
 square()]

In [50]:
g.operations[0].input_nodes

[Tensor(3, dtype=int64), Tensor(3, dtype=int64)]

In [363]:
# evalulate(g)

In [362]:
# l = [ ("z1", "add", ("x1","x1")),
# ("z2", "add", ("z1","x2")),
# ("f", "square", ("z2",)) ]

# G = { "add" : lambda a,b: a+b,
# "square": lambda a:a*a }

In [360]:
# val = { "x1" : 3, "x2" : 7 }

# for step in l:
#     print(val)
#     var, op_name, func = step
#     lookup = list(map(val.get, func))
#     val[var] = G[op_name](*lookup)

In [358]:
# DG = { "add" : [ (lambda a,b: 1), (lambda a,b: 1) ],
# "square": [ lambda a:2*a ] }

# delta={}
# delta["f"] = 1
# for step in l[::-1]:
#     var, op_name, func = step
#     for op in DG[op_name]:
#         if var not in delta:
#             delta[var] = 0
#         lookup = list(map(val.get, func))
#         print(lookup)
#         delta[var] = delta[var] + DG[op_name](*lookup)

In [110]:
class Tensor(object):
    
    def __init__(self, data: np.ndarray, depends_on=None):
        
        self.depends_on = depends_on or []
        self.data = data
            
    def __add__(self, other):
        return Tensor(self.data + other.data, depends_on=[self, other])
    
    def __pow__(self, p):
        data = self.data ** p 
        return Tensor(data, depends_on=[self])
    
    def __repr__(self):
        return f"Tensor({self.data}, dtype={self.data.dtype})"


In [111]:
y1 = Tensor(np.array(3))
y2 = Tensor(np.array(7))
z1 = y1 + y1
z2 = z1 ** 2

In [113]:
z2.depends_on

[Tensor(6, dtype=int64)]