In [None]:
import numpy as np
import matplotlib.pyplot as plt
import edf
import mnist_loader

train_images, train_labels = mnist_loader.load_mnist(section = 'training', path = 'MNIST')
test_images, test_labels = mnist_loader.load_mnist(section = 'testing', path = 'MNIST')

plt.imshow(train_images[0], cmap='gray', interpolation = 'nearest')
plt.show()

In [None]:
"""for debuggin purposes it is useful to
make the computation deterministic"""

np.random.seed(1234)

# Now we build a model with random parameters

x = edf.Input((28*28,))
y = edf.Input((1,))
    
W1 = edf.Parameter(np.random.randn(20,28*28)*.1)
W2 = edf.Parameter(np.random.randn(10,20)*.1)
    
L1 = edf.Sigmoid(edf.Norm(edf.VDot(W1,x)))
L2 = edf.Softmax(edf.Norm(edf.VDot(W2,L1)))
M = edf.Model(edf.NegLog(edf.Aref(L2,y)))
    
"""the following is needed by the learning code below to set
the inputs (input and label are both "inputs" to the model)"""
    
M.x = x
M.y = y

In [None]:
"""the verify procedure does backpropagation and numerical
differentiation and compares to the two. This is important
for debugging componets.  When writing a new component
one should verify a stub model that uses just that component.
Discrepencies between backprop and numerical differntiation
can be due to bugs, numerical precision issues, or nonlinearities
in the model.  It is good to have your stub model is
operating in a numerically stable regime.  Beware of sigmoids causing
vanishing gradients."""

x.value = np.random.rand(28*28)
y.value = np.zeros(1,np.int64)

edf.verify(M)

In [None]:
"""the following functions are used to train and evaluate the
network. training is done by iterating over all training samples
and updating the model's parameters with gradient descent."""

def train(M, nepochs):
    train_loss_curve = []
    train_err_curve = []
    test_loss_curve = []
    test_err_curve = []
    for epoch in range(nepochs):
        print("Epoch: {}/{}".format(epoch+1, nepochs))
        train_an_epoch(M)
        train_loss, train_err = evaluate(M, train_images, train_labels)
        train_loss_curve.append(train_loss)
        train_err_curve.append(100*train_err)
        print ("\t Training: Loss {:.3f} Error {:.2f}".format(train_loss, train_err))
        test_loss, test_err = evaluate(M, test_images, test_labels)
        test_loss_curve.append(test_loss)
        test_err_curve.append(100*test_err)
        print ("\t Test: Loss {:.3f} Error {:.2f}".format(test_loss, test_err))
    return train_loss_curve, train_err_curve, test_loss_curve, test_err_curve

def train_an_epoch(M):
    numsamples = train_images.shape[0]
    for i in range(numsamples):
        M.x.value = train_images[i].reshape((28*28,))
        M.y.value = [train_labels[i]]
        M.backprop()
        if i%20000 == 0:
            print ("\t Batch {}/{}".format(i, numsamples))

def evaluate(M, data, labels):
    total_loss = 0.0
    total_mistakes = 0.0
    numsamples = data.shape[0]
    for i in range(numsamples):
        M.x.value = data[i].reshape((28*28,))
        M.y.value = [labels[i]]
        M.forward()
        total_loss = total_loss + np.mean(M.loss.value)
        total_mistakes = total_mistakes + int(np.argmax(L2.value) != labels[i])
    return total_loss/numsamples, total_mistakes/numsamples

In [None]:
"""now, we are ready to train a network on MNIST. the following will
iterate over the training dataset a total of 10 times (10 epochs)."""

train_loss_curve, train_err_curve, test_loss_curve, test_err_curve = train(M,10)

In [None]:
"""we can then plot the loss and error curves on the training and test data."""

plt.xlabel("epochs")
plt.ylabel("loss")
plt.plot(np.arange(len(test_loss_curve)), test_loss_curve, color='red')
plt.plot(np.arange(len(train_loss_curve)), train_loss_curve, color='blue')
plt.legend(['test loss', 'train loss'], loc='upper right')
plt.show()
plt.clf()

plt.xlabel("epochs")
plt.ylabel("error (%)")
plt.plot(np.arange(len(test_err_curve)), test_err_curve, color='red')
plt.plot(np.arange(len(train_err_curve)), train_err_curve, color='blue')
plt.legend(['test error', 'train error'], loc='upper right')
plt.show()
plt.clf()

In [None]:
"""next, you have to implement a ReLU activation function ReLU(x) = max(0,x).
implement the forward and backward methods of the following class. use the
provided EDF source code to understand what each method should do. note that
since ReLU has no parameters, it might be helpful to base your implementation
on how the Sigmoid EDF Component is implemented."""

class ReLU(edf.Component):
    """for y = ReLU(x) we have that y has the same shape
    as x where y[i1,...,ik] = relu(x[i1,...ik]),
    where relu(z) = max(0,z)"""
    def __init__(self,x):
        self.value = np.empty(x.shape,np.float32)
        self.x = x
        edf.Component.__init__(self)
        edf.input_of(x,self)
        
    def forward(self):
        pass

    def backward(self):
        pass

In [None]:
"""the code below will plot the output and gradients computed by your
implementation of the ReLU component above. check if the plots match
ReLU(x) and dReLU/dx(x) as a sanity test of your implementation."""

x = edf.Input((1,))
y = edf.Input((1,))
relu = ReLU(x)
M = edf.Model(relu)
M.x = x
M.y = y

outputs = []
grads = []
values = np.linspace(-2,2,100)

for v in values:
    M.x.value = [v]
    M.y.value = [1]
    M.backprop()
    outputs.append(relu.value[0])
    grads.append(relu.x.grad[0])

plt.xlabel("value")
plt.plot(values, outputs, color='red')
plt.plot(values, grads, color='blue')
plt.legend(['output', 'grad'], loc='upper left')
plt.show()
plt.clf()

In [None]:
"""now, train the same network as before but with a ReLU activation
in the hidden layer instead of a Sigmoid."""

np.random.seed(1234)

x = edf.Input((28*28,))
y = edf.Input((1,))
    
W1 = edf.Parameter(np.random.randn(20,28*28)*.1)
W2 = edf.Parameter(np.random.randn(10,20)*.1)
    
L1 = ReLU(edf.Norm(edf.VDot(W1,x)))
L2 = edf.Softmax(edf.Norm(edf.VDot(W2,L1)))
M = edf.Model(edf.NegLog(edf.Aref(L2,y)))
  
M.x = x
M.y = y

x.value = np.random.rand(28*28)
y.value = np.zeros(1,np.int64)

In [None]:
train_loss_curve, train_err_curve, test_loss_curve, test_err_curve = train(M,10)

In [None]:
plt.xlabel("epochs")
plt.ylabel("loss")
plt.plot(np.arange(len(test_loss_curve)), test_loss_curve, color='red')
plt.plot(np.arange(len(train_loss_curve)), train_loss_curve, color='blue')
plt.legend(['test loss', 'train loss'], loc='upper right')
plt.show()
plt.clf()

plt.xlabel("epochs")
plt.ylabel("error (%)")
plt.plot(np.arange(len(test_err_curve)), test_err_curve, color='red')
plt.plot(np.arange(len(train_err_curve)), train_err_curve, color='blue')
plt.legend(['test error', 'train error'], loc='upper right')
plt.show()
plt.clf()

In [None]:
"""finally, implement the Tanh activation function by filling the missing
code in the forward and backward methods below. it might be helpful to derive
a relationship between Tanh and Sigmoid so that you can re-use parts of 
EDF's Sigmoid code."""

class Tanh(edf.Component):
    """for y = ReLU(x) we have that y has the same shape
    as x where y[i1,...,ik] = tanh(x[i1,...ik]),
    where tanh(z) = (e^z - e^-z)/(e^z + e^-z)"""
    def __init__(self,x):
        self.value = np.empty(x.shape,np.float32)
        self.x = x
        edf.Component.__init__(self)
        edf.input_of(x,self)
        
    def forward(self):
        pass

    def backward(self):
        pass

In [None]:
"""again, make sure that the output and gradients plotted below are correct."""

x = edf.Input((1,))
y = edf.Input((1,))
tanh = Tanh(x)
M = edf.Model(tanh)
M.x = x
M.y = y

outputs = []
grads = []
values = np.linspace(-5,5,100)

for v in values:
    M.x.value = [v]
    M.y.value = [1]
    M.backprop()
    outputs.append(tanh.value[0])
    grads.append(tanh.x.grad[0])

plt.xlabel("value")
plt.plot(values, outputs, color='red')
plt.plot(values, grads, color='blue')
plt.legend(['output', 'grad'], loc='upper left')
plt.show()
plt.clf()

In [None]:
"""you can then train a network with a Tanh activation function instead
of Sigmoid/ReLU."""

np.random.seed(1234)

x = edf.Input((28*28,))
y = edf.Input((1,))
    
W1 = edf.Parameter(np.random.randn(20,28*28)*.1)
W2 = edf.Parameter(np.random.randn(10,20)*.1)
    
L1 = Tanh(edf.Norm(edf.VDot(W1,x)))
L2 = edf.Softmax(edf.Norm(edf.VDot(W2,L1)))
M = edf.Model(edf.NegLog(edf.Aref(L2,y)))
  
M.x = x
M.y = y

x.value = np.random.rand(28*28)
y.value = np.zeros(1,np.int64)

In [None]:
train_loss_curve, train_err_curve, test_loss_curve, test_err_curve = train(M,10)

In [None]:
plt.xlabel("epochs")
plt.ylabel("loss")
plt.plot(np.arange(len(test_loss_curve)), test_loss_curve, color='red')
plt.plot(np.arange(len(train_loss_curve)), train_loss_curve, color='blue')
plt.legend(['test loss', 'train loss'], loc='upper right')
plt.show()
plt.clf()

plt.xlabel("epochs")
plt.ylabel("error (%)")
plt.plot(np.arange(len(test_err_curve)), test_err_curve, color='red')
plt.plot(np.arange(len(train_err_curve)), train_err_curve, color='blue')
plt.legend(['test error', 'train error'], loc='upper right')
plt.show()
plt.clf()