# Initialize the data 

Randomly create a single sample

In [3]:
import numpy as np
d = np.random.rand(1, 3072)
t = np.array([[1]])
print(t.shape)

(1, 1)


In [4]:
print(d.shape)

(1, 3072)


# Operations
Implement different operations with forward and backward functions.

In [5]:

class Operation(object):

    def forward(self):
        pass

    def backward(self):
        pass


class Add(Operation):

    def forward(self, a, b):
        # a and b are vectors
        return a + b

    def backward(self, dy):
        # if a is a matrix, b is a vector
        # y = a + b
        return dy, dy


class Mult(Operation):

    def forward(self, x, w):
        # y = xW
        return np.dot(x, w)

    def backward(self, dy, x, w):
        # check shape
        # return dx and dW
        return np.dot(dy, w.T), np.dot(x.T, dy)


class Sigmoid(Operation):

    def forward(self, x):
        self.y = 1 / (1 + np.exp(-x))
        return self.y

    def backward(self, dy):
        # dy = del(L)/del(y)
        return dy * self.y * (1 - self.y)


class ReLU(Operation):

    def forward(self, x):
        self.mask = x > 0
        return self.mask * x

    def backward(self, dy):
        return self.mask * dy



class CrossEntropy(Operation):

    def forward(self, y, t):
        return -t * np.log(y) - (1 - t) * np.log(1 - y)
      
    def backward(self, y, t):
        return -t/y + (1-t)/(1-y)


class TanH(Operation):
    
    def forward(self, x):
        self.y = np.tanh(x)
        return self.y
    
    def backward(self, dy):
        return dy * (1 - np.square(self.y))

In [11]:
# dy = del(L)/del(y)
# therefore, del(L)/del(x) = (del(L)/del(y)) * (del(y)/del(x))

## Create the network using Operations

Randomly initialize the parameters

In [12]:
np.random.seed(17)
w1 = np.random.randn(3072, 8) / 100
b1 = np.zeros((8,))

w2 = np.random.randn(8, 1) / 100
b2 = np.zeros((1,))

o1 = Mult()
o2 = Add()
o3 = ReLU()
o4 = Mult()
o5 = Add()
o6 = Sigmoid()
loss = CrossEntropy()


## Forward

In [13]:
v1 = o1.forward(d, w1)
z1 = o2.forward(v1, b1)
h = o3.forward(z1)
v2 = o4.forward(h, w2)
z2 = o5.forward(v2, b2)
y = o6.forward(z2)
l=loss.forward(y, t)
print('loss=', l)



loss= [[0.69098579]]


## Backward

In [14]:
dy = loss.backward(y, t)
dz2 = o6.backward(dy)
dv2, db2 = o5.backward(dz2)
dh, dw2 = o4.backward(dv2, h, w2)
dz1 = o3.backward(dh)
dv1, db1 = o2.backward(dz1)
dx, dw1 = o1.backward(dv1, d, w1)
print(dw2)


[[-0.07897976]
 [-0.14668946]
 [ 0.        ]
 [ 0.        ]
 [-0.03766654]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]


In [15]:
a = 0.01
w1 = w1 - a * dw1
w2 = w2 - a * dw2
b1 = b1 - a * db1
b2 = b2 - a * db2

# Layer

In [16]:
class Layer(object):

    def __init__(self, name):
        self.name = name
        pass

    def forward(self, x):
        pass

    def backward(self, dy):
        pass


class Linear(Layer):

    def __init__(self, nb_in, nb_out, act):
        self.w = np.random.randn(nb_in, nb_out) / 100
        self.b = np.zeros((nb_out,))
        self.mult = Mult()
        self.add = Add()
        self.act = act

    def forward(self, x):
        self.x = x
        v = self.mult.forward(x, self.w)
        return self.act.forward(self.add.forward(v, self.b))

    def backward(self, dy):
        dy = self.act.backward(dy)
        dv, db = self.add.backward(dy)
        dx, dw = self.mult.backward(dv, self.x, self.w)
        return dx, dw, db

## Create the network using Layers

In [17]:
np.random.seed(17)
layer1 = Linear(3072, 8, act=ReLU())
layer2 = Linear(8, 1, act=Sigmoid())
loss = CrossEntropy()

## Forward

In [18]:
h=layer1.forward(d)
y=layer2.forward(h)
l=loss.forward(y, t)
print('loss=', l)


loss= [[0.69098579]]


## Backward

In [19]:
dy=loss.backward(y, t)
dh, dw2, db2 =layer2.backward(dy)
dx, dw1, db1 = layer1.backward(dh)
print(dw2)


[[-0.07897976]
 [-0.14668946]
 [ 0.        ]
 [ 0.        ]
 [-0.03766654]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]


# TODO

Extend the code to process a batch of samples

Hint: the `Add()`  operations should be updated.

In [0]:
X=np.random.rand(8, 3072)
T=np.random.randint(0, 2, (8, 1))