Permalink
Browse files

cloned repository for Nielsen's book and re-worked my own IPython not…

…ebook of exercise solutions to reflect his code
  • Loading branch information...
jonkrohn committed Aug 16, 2016
1 parent 13843fb commit a4b34a82e52a2c823f84428c275f2a2d79718aee
Showing with 3,431 additions and 268 deletions.
  1. +8 −0 neural-networks-and-deep-learning/.gitignore
  2. +33 −0 neural-networks-and-deep-learning/README.md
  3. BIN neural-networks-and-deep-learning/data/mnist.pkl.gz
  4. BIN neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.png
  5. +52 −0 neural-networks-and-deep-learning/fig/backprop_magnitude_nabla.py
  6. +1 −0 neural-networks-and-deep-learning/fig/data_1000.json
  7. BIN neural-networks-and-deep-learning/fig/digits.png
  8. BIN neural-networks-and-deep-learning/fig/digits_separate.png
  9. BIN neural-networks-and-deep-learning/fig/false_minima.png
  10. +40 −0 neural-networks-and-deep-learning/fig/false_minima.py
  11. +119 −0 neural-networks-and-deep-learning/fig/generate_gradient.py
  12. +1 −0 neural-networks-and-deep-learning/fig/initial_gradient.json
  13. BIN neural-networks-and-deep-learning/fig/misleading_gradient.png
  14. +43 −0 neural-networks-and-deep-learning/fig/misleading_gradient.py
  15. BIN neural-networks-and-deep-learning/fig/misleading_gradient_contours.png
  16. +21 −0 neural-networks-and-deep-learning/fig/misleading_gradient_contours.py
  17. +234 −0 neural-networks-and-deep-learning/fig/mnist.py
  18. BIN neural-networks-and-deep-learning/fig/mnist_100_digits.png
  19. BIN neural-networks-and-deep-learning/fig/mnist_2_and_1.png
  20. BIN neural-networks-and-deep-learning/fig/mnist_complete_zero.png
  21. BIN neural-networks-and-deep-learning/fig/mnist_first_digit.png
  22. BIN neural-networks-and-deep-learning/fig/mnist_other_features.png
  23. BIN neural-networks-and-deep-learning/fig/mnist_really_bad_images.png
  24. BIN neural-networks-and-deep-learning/fig/mnist_top_left_feature.png
  25. +1 −0 neural-networks-and-deep-learning/fig/more_data.json
  26. BIN neural-networks-and-deep-learning/fig/more_data.png
  27. +122 −0 neural-networks-and-deep-learning/fig/more_data.py
  28. BIN neural-networks-and-deep-learning/fig/more_data_5.png
  29. BIN neural-networks-and-deep-learning/fig/more_data_comparison.png
  30. BIN neural-networks-and-deep-learning/fig/more_data_log.png
  31. BIN neural-networks-and-deep-learning/fig/more_data_rotated_5.png
  32. +1 −0 neural-networks-and-deep-learning/fig/more_data_svm.json
  33. +1 −0 neural-networks-and-deep-learning/fig/multiple_eta.json
  34. BIN neural-networks-and-deep-learning/fig/multiple_eta.png
  35. +73 −0 neural-networks-and-deep-learning/fig/multiple_eta.py
  36. +1 −0 neural-networks-and-deep-learning/fig/norms_during_training_2_layers.json
  37. +1 −0 neural-networks-and-deep-learning/fig/norms_during_training_3_layers.json
  38. +1 −0 neural-networks-and-deep-learning/fig/norms_during_training_4_layers.json
  39. +1 −0 neural-networks-and-deep-learning/fig/overfitting.json
  40. +179 −0 neural-networks-and-deep-learning/fig/overfitting.py
  41. BIN neural-networks-and-deep-learning/fig/overfitting1.png
  42. BIN neural-networks-and-deep-learning/fig/overfitting2.png
  43. BIN neural-networks-and-deep-learning/fig/overfitting3.png
  44. BIN neural-networks-and-deep-learning/fig/overfitting4.png
  45. +1 −0 neural-networks-and-deep-learning/fig/overfitting_full.json
  46. BIN neural-networks-and-deep-learning/fig/overfitting_full.png
  47. BIN neural-networks-and-deep-learning/fig/pca_hard_data.png
  48. BIN neural-networks-and-deep-learning/fig/pca_hard_data_fit.png
  49. +32 −0 neural-networks-and-deep-learning/fig/pca_limitations.py
  50. +1 −0 neural-networks-and-deep-learning/fig/regularized.json
  51. BIN neural-networks-and-deep-learning/fig/regularized1.png
  52. BIN neural-networks-and-deep-learning/fig/regularized2.png
  53. +1 −0 neural-networks-and-deep-learning/fig/regularized_full.json
  54. BIN neural-networks-and-deep-learning/fig/regularized_full.png
  55. +6 −0 neural-networks-and-deep-learning/fig/replaced_by_d3/README.md
  56. BIN neural-networks-and-deep-learning/fig/replaced_by_d3/relu.png
  57. +24 −0 neural-networks-and-deep-learning/fig/replaced_by_d3/relu.py
  58. BIN neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.png
  59. +23 −0 neural-networks-and-deep-learning/fig/replaced_by_d3/sigmoid.py
  60. BIN neural-networks-and-deep-learning/fig/replaced_by_d3/step.png
  61. +23 −0 neural-networks-and-deep-learning/fig/replaced_by_d3/step.py
  62. BIN neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.png
  63. +22 −0 neural-networks-and-deep-learning/fig/replaced_by_d3/tanh.py
  64. +46 −0 neural-networks-and-deep-learning/fig/serialize_images_to_json.py
  65. BIN neural-networks-and-deep-learning/fig/test.png
  66. BIN neural-networks-and-deep-learning/fig/training_speed_2_layers.png
  67. BIN neural-networks-and-deep-learning/fig/training_speed_3_layers.png
  68. BIN neural-networks-and-deep-learning/fig/training_speed_4_layers.png
  69. BIN neural-networks-and-deep-learning/fig/valley.png
  70. +43 −0 neural-networks-and-deep-learning/fig/valley.py
  71. BIN neural-networks-and-deep-learning/fig/valley2.png
  72. +48 −0 neural-networks-and-deep-learning/fig/valley2.py
  73. +89 −0 neural-networks-and-deep-learning/fig/weight_initialization.py
  74. +1 −0 neural-networks-and-deep-learning/fig/weight_initialization_100.json
  75. BIN neural-networks-and-deep-learning/fig/weight_initialization_100.png
  76. +1 −0 neural-networks-and-deep-learning/fig/weight_initialization_30.json
  77. BIN neural-networks-and-deep-learning/fig/weight_initialization_30.png
  78. +4 −0 neural-networks-and-deep-learning/requirements.txt
  79. +297 −0 neural-networks-and-deep-learning/src/conv.py
  80. +60 −0 neural-networks-and-deep-learning/src/expand_mnist.py
  81. +64 −0 neural-networks-and-deep-learning/src/mnist_average_darkness.py
  82. +85 −0 neural-networks-and-deep-learning/src/mnist_loader.py
  83. +28 −0 neural-networks-and-deep-learning/src/mnist_svm.py
  84. +17 −36 nielsen_ch1_network.py → neural-networks-and-deep-learning/src/network.py
  85. +332 −0 neural-networks-and-deep-learning/src/network2.py
  86. +311 −0 neural-networks-and-deep-learning/src/network3.py
  87. 0 neural-networks-and-deep-learning/src/old/blog/__init__.py
  88. +90 −0 neural-networks-and-deep-learning/src/old/blog/common_knowledge.py
  89. BIN neural-networks-and-deep-learning/src/old/cost_vs_iterations.png
  90. BIN neural-networks-and-deep-learning/src/old/cost_vs_iterations_trapped.png
  91. +118 −0 neural-networks-and-deep-learning/src/old/deep_autoencoder.py
  92. +51 −0 neural-networks-and-deep-learning/src/old/deep_learning.py
  93. +86 −0 neural-networks-and-deep-learning/src/old/gradient_descent_hack.py
  94. BIN neural-networks-and-deep-learning/src/old/mnist_100_30_deep_autoencoder.png
  95. BIN neural-networks-and-deep-learning/src/old/mnist_100_unit_autoencoder.png
  96. BIN neural-networks-and-deep-learning/src/old/mnist_10_unit_autoencoder.png
  97. BIN neural-networks-and-deep-learning/src/old/mnist_30_component_pca.png
  98. BIN neural-networks-and-deep-learning/src/old/mnist_30_unit_autoencoder.png
  99. +83 −0 neural-networks-and-deep-learning/src/old/mnist_autoencoder.py
  100. +40 −0 neural-networks-and-deep-learning/src/old/mnist_pca.py
  101. +67 −0 neural-networks-and-deep-learning/src/old/perceptron_learning.py
  102. +329 −0 neural-networks-and-deep-learning/src/run_network.ipynb
  103. +75 −0 neural-networks-and-deep-learning/src/run_network.py
  104. +0 −232 nielsen_ch1_network.ipynb
@@ -0,0 +1,8 @@
*~
*.org
*.pem
*.pkl
*.pyc
.DS_Store
loc.py
src/ec2
@@ -0,0 +1,33 @@
# Code samples for "Neural Networks and Deep Learning"
This repository contains code samples for my (forthcoming) book on
"Neural Networks and Deep Learning".
As the code is written to accompany the book, I don't intend to add
new features. However, bug reports are welcome, and you should feel
free to fork and modify the code.
## License
MIT License
Copyright (c) 2012-2015 Michael Nielsen
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Binary file not shown.
@@ -0,0 +1,52 @@
"""
backprop_magnitude_nabla
~~~~~~~~~~~~~~~~~~~~~~~~
Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify
MNIST data. I ran ten mini-batches of size 100, with eta = 0.01 and
lambda = 0.05, using:
net.SGD(otd[:1000], 1, 100, 0.01, 0.05,
I obtained the following norms for the (unregularized) nabla_w for the
respective mini-batches:
[0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839]
[0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221]
[0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556]
[0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733]
[0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385]
[0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895]
[0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566]
[0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781]
[0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819]
[0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092]
Note that results are listed in order of layer. They clearly show how
the magnitude of nabla_w decreases as we go back through layers.
In this program I take min-batches 7, 8, 9 as representative and plot
them. I omit the results from the first and final layers since they
correspond to 784 input neurons and 10 output neurons, not 30 as in
the other layers, making it difficult to compare results.
Note that I haven't attempted to preserve the whole workflow here. It
involved some minor hacking around with backprop2, which messed up
that code. That's why I've simply put the results in by hand below.
"""
# Third-party libraries
import matplotlib.pyplot as plt
nw1 = [0.129173436407863, 0.4242933114455002,
1.6154682713449411, 7.5451567587160069]
nw2 = [0.12571016850457151, 0.44231149185805047,
1.8435833504677326, 7.61973813981073]
nw3 = [0.15854489503205446, 0.70244235144444678,
2.6294803575724157, 10.427062019753425]
plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-",
range(1, 5), nw3, "bo-")
plt.xlabel('Layer $l$')
plt.ylabel(r"$\Vert\nabla C^l_w\Vert$")
plt.xticks([1, 2, 3, 4])
plt.show()

Large diffs are not rendered by default.

Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,40 @@
"""
false_minimum
~~~~~~~~~~~~~
Plots a function of two variables with many false minima."""
#### Libraries
# Third party libraries
from matplotlib.ticker import LinearLocator
# Note that axes3d is not explicitly used in the code, but is needed
# to register the 3d plot type correctly
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy
fig = plt.figure()
ax = fig.gca(projection='3d')
X = numpy.arange(-5, 5, 0.1)
Y = numpy.arange(-5, 5, 0.1)
X, Y = numpy.meshgrid(X, Y)
Z = numpy.sin(X)*numpy.sin(Y)+0.2*X
colortuple = ('w', 'b')
colors = numpy.empty(X.shape, dtype=str)
for x in xrange(len(X)):
for y in xrange(len(Y)):
colors[x, y] = colortuple[(x + y) % 2]
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
linewidth=0)
ax.set_xlim3d(-5, 5)
ax.set_ylim3d(-5, 5)
ax.set_zlim3d(-2, 2)
ax.w_xaxis.set_major_locator(LinearLocator(3))
ax.w_yaxis.set_major_locator(LinearLocator(3))
ax.w_zaxis.set_major_locator(LinearLocator(3))
plt.show()
@@ -0,0 +1,119 @@
"""generate_gradient.py
~~~~~~~~~~~~~~~~~~~~~~~
Use network2 to figure out the average starting values of the gradient
error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
\partial b^l_j.
"""
#### Libraries
# Standard library
import json
import math
import random
import shutil
import sys
sys.path.append("../src/")
# My library
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
def main():
# Load the data
full_td, _, _ = mnist_loader.load_data_wrapper()
td = full_td[:1000] # Just use the first 1000 items of training data
epochs = 500 # Number of epochs to train for
print "\nTwo hidden layers:"
net = network2.Network([784, 30, 30, 10])
initial_norms(td, net)
abbreviated_gradient = [
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
print "Saving the averaged gradient for the top six neurons in each "+\
"layer.\nWARNING: This will affect the look of the book, so be "+\
"sure to check the\nrelevant material (early chapter 5)."
f = open("initial_gradient.json", "w")
json.dump(abbreviated_gradient, f)
f.close()
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
training(td, net, epochs, "norms_during_training_2_layers.json")
plot_training(
epochs, "norms_during_training_2_layers.json", 2)
print "\nThree hidden layers:"
net = network2.Network([784, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs, "norms_during_training_3_layers.json")
plot_training(
epochs, "norms_during_training_3_layers.json", 3)
print "\nFour hidden layers:"
net = network2.Network([784, 30, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs,
"norms_during_training_4_layers.json")
plot_training(
epochs, "norms_during_training_4_layers.json", 4)
def initial_norms(training_data, net):
average_gradient = get_average_gradient(net, training_data)
norms = [list_norm(avg) for avg in average_gradient[:-1]]
print "Average gradient for the hidden layers: "+str(norms)
def training(training_data, net, epochs, filename):
norms = []
for j in range(epochs):
average_gradient = get_average_gradient(net, training_data)
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
print "Epoch: %s" % j
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
f = open(filename, "w")
json.dump(norms, f)
f.close()
def plot_training(epochs, filename, num_layers):
f = open(filename, "r")
norms = json.load(f)
f.close()
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
for j in range(num_layers):
ax.plot(np.arange(epochs),
[n[j] for n in norms],
color=colors[j],
label="Hidden layer %s" % (j+1,))
ax.set_xlim([0, epochs])
ax.grid(True)
ax.set_xlabel('Number of epochs of training')
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
ax.set_yscale('log')
plt.legend(loc="upper right")
fig_filename = "training_speed_%s_layers.png" % num_layers
plt.savefig(fig_filename)
shutil.copy(fig_filename, "../../images/"+fig_filename)
plt.show()
def get_average_gradient(net, training_data):
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
gradient = list_sum(nabla_b_results)
return [(np.reshape(g, len(g))/len(training_data)).tolist()
for g in gradient]
def zip_sum(a, b):
return [x+y for (x, y) in zip(a, b)]
def list_sum(l):
return reduce(zip_sum, l)
def list_norm(l):
return math.sqrt(sum([x*x for x in l]))
if __name__ == "__main__":
main()
@@ -0,0 +1 @@
[[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]
Binary file not shown.
@@ -0,0 +1,43 @@
"""
misleading_gradient
~~~~~~~~~~~~~~~~~~~
Plots a function which misleads the gradient descent algorithm."""
#### Libraries
# Third party libraries
from matplotlib.ticker import LinearLocator
# Note that axes3d is not explicitly used in the code, but is needed
# to register the 3d plot type correctly
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy
fig = plt.figure()
ax = fig.gca(projection='3d')
X = numpy.arange(-1, 1, 0.025)
Y = numpy.arange(-1, 1, 0.025)
X, Y = numpy.meshgrid(X, Y)
Z = X**2 + 10*Y**2
colortuple = ('w', 'b')
colors = numpy.empty(X.shape, dtype=str)
for x in xrange(len(X)):
for y in xrange(len(Y)):
colors[x, y] = colortuple[(x + y) % 2]
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
linewidth=0)
ax.set_xlim3d(-1, 1)
ax.set_ylim3d(-1, 1)
ax.set_zlim3d(0, 12)
ax.w_xaxis.set_major_locator(LinearLocator(3))
ax.w_yaxis.set_major_locator(LinearLocator(3))
ax.w_zaxis.set_major_locator(LinearLocator(3))
ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20)
ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20)
ax.text(1.79, 0, 9.62, "$C$", fontsize=20)
plt.show()
@@ -0,0 +1,21 @@
"""
misleading_gradient_contours
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Plots the contours of the function from misleading_gradient.py"""
#### Libraries
# Third party libraries
import matplotlib.pyplot as plt
import numpy
X = numpy.arange(-1, 1, 0.02)
Y = numpy.arange(-1, 1, 0.02)
X, Y = numpy.meshgrid(X, Y)
Z = X**2 + 10*Y**2
plt.figure()
CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
plt.xlabel("$w_1$", fontsize=16)
plt.ylabel("$w_2$", fontsize=16)
plt.show()
Oops, something went wrong.

0 comments on commit a4b34a8

Please sign in to comment.