Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
120 lines (102 sloc) 3.76 KB
Use network2 to figure out the average starting values of the gradient
error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
\partial b^l_j.
#### Libraries
# Standard library
import json
import math
import random
import shutil
import sys
# My library
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
def main():
# Load the data
full_td, _, _ = mnist_loader.load_data_wrapper()
td = full_td[:1000] # Just use the first 1000 items of training data
epochs = 500 # Number of epochs to train for
print "\nTwo hidden layers:"
net = network2.Network([784, 30, 30, 10])
initial_norms(td, net)
abbreviated_gradient = [
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
print "Saving the averaged gradient for the top six neurons in each "+\
"layer.\nWARNING: This will affect the look of the book, so be "+\
"sure to check the\nrelevant material (early chapter 5)."
f = open("initial_gradient.json", "w")
json.dump(abbreviated_gradient, f)
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
training(td, net, epochs, "norms_during_training_2_layers.json")
epochs, "norms_during_training_2_layers.json", 2)
print "\nThree hidden layers:"
net = network2.Network([784, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs, "norms_during_training_3_layers.json")
epochs, "norms_during_training_3_layers.json", 3)
print "\nFour hidden layers:"
net = network2.Network([784, 30, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs,
epochs, "norms_during_training_4_layers.json", 4)
def initial_norms(training_data, net):
average_gradient = get_average_gradient(net, training_data)
norms = [list_norm(avg) for avg in average_gradient[:-1]]
print "Average gradient for the hidden layers: "+str(norms)
def training(training_data, net, epochs, filename):
norms = []
for j in range(epochs):
average_gradient = get_average_gradient(net, training_data)
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
print "Epoch: %s" % j
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
f = open(filename, "w")
json.dump(norms, f)
def plot_training(epochs, filename, num_layers):
f = open(filename, "r")
norms = json.load(f)
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
for j in range(num_layers):
[n[j] for n in norms],
label="Hidden layer %s" % (j+1,))
ax.set_xlim([0, epochs])
ax.set_xlabel('Number of epochs of training')
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
plt.legend(loc="upper right")
fig_filename = "training_speed_%s_layers.png" % num_layers
shutil.copy(fig_filename, "../../images/"+fig_filename)
def get_average_gradient(net, training_data):
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
gradient = list_sum(nabla_b_results)
return [(np.reshape(g, len(g))/len(training_data)).tolist()
for g in gradient]
def zip_sum(a, b):
return [x+y for (x, y) in zip(a, b)]
def list_sum(l):
return reduce(zip_sum, l)
def list_norm(l):
return math.sqrt(sum([x*x for x in l]))
if __name__ == "__main__":