initial import week 4, not solved

schneems · Nov 13, 2011 · 5811f95 · 5811f95
1 parent a913643
commit 5811f95
Show file tree

Hide file tree

Showing 18 changed files with 1,444 additions and 0 deletions.
diff --git a/mlclass-ex3/mlclass-ex3/octave-core b/mlclass-ex3/mlclass-ex3/octave-core
diff --git a/mlclass-ex4/ex4.pdf b/mlclass-ex4/ex4.pdf
diff --git a/mlclass-ex4/mlclass-ex4/checkNNGradients.m b/mlclass-ex4/mlclass-ex4/checkNNGradients.m
@@ -0,0 +1,52 @@
+function checkNNGradients(lambda)
+%CHECKNNGRADIENTS Creates a small neural network to check the
+%backpropagation gradients
+%   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
+%   backpropagation gradients, it will output the analytical gradients
+%   produced by your backprop code and the numerical gradients (computed
+%   using computeNumericalGradient). These two gradient computations should
+%   result in very similar values.
+%
+
+if ~exist('lambda', 'var') || isempty(lambda)
+    lambda = 0;
+end
+
+input_layer_size = 3;
+hidden_layer_size = 5;
+num_labels = 3;
+m = 5;
+
+% We generate some 'random' test data
+Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size);
+Theta2 = debugInitializeWeights(num_labels, hidden_layer_size);
+% Reusing debugInitializeWeights to generate X
+X  = debugInitializeWeights(m, input_layer_size - 1);
+y  = 1 + mod(1:m, num_labels)';
+
+% Unroll parameters
+nn_params = [Theta1(:) ; Theta2(:)];
+
+% Short hand for cost function
+costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ...
+                               num_labels, X, y, lambda);
+
+[cost, grad] = costFunc(nn_params);
+numgrad = computeNumericalGradient(costFunc, nn_params);
+
+% Visually examine the two gradient computations.  The two columns
+% you get should be very similar. 
+disp([numgrad grad]);
+fprintf(['The above two columns you get should be very similar.\n' ...
+         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
+
+% Evaluate the norm of the difference between two solutions.  
+% If you have a correct implementation, and assuming you used EPSILON = 0.0001 
+% in computeNumericalGradient.m, then diff below should be less than 1e-9
+diff = norm(numgrad-grad)/norm(numgrad+grad);
+
+fprintf(['If your backpropagation implementation is correct, then \n' ...
+         'the relative difference will be small (less than 1e-9). \n' ...
+         '\nRelative Difference: %g\n'], diff);
+
+end
diff --git a/mlclass-ex4/mlclass-ex4/computeNumericalGradient.m b/mlclass-ex4/mlclass-ex4/computeNumericalGradient.m
@@ -0,0 +1,29 @@
+function numgrad = computeNumericalGradient(J, theta)
+%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
+%and gives us a numerical estimate of the gradient.
+%   numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
+%   gradient of the function J around theta. Calling y = J(theta) should
+%   return the function value at theta.
+
+% Notes: The following code implements numerical gradient checking, and 
+%        returns the numerical gradient.It sets numgrad(i) to (a numerical 
+%        approximation of) the partial derivative of J with respect to the 
+%        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
+%        be the (approximately) the partial derivative of J with respect 
+%        to theta(i).)
+%                
+
+numgrad = zeros(size(theta));
+perturb = zeros(size(theta));
+e = 1e-4;
+for p = 1:numel(theta)
+    % Set perturbation vector
+    perturb(p) = e;
+    loss1 = J(theta - perturb);
+    loss2 = J(theta + perturb);
+    % Compute Numerical Gradient
+    numgrad(p) = (loss2 - loss1) / (2*e);
+    perturb(p) = 0;
+end
+
+end
diff --git a/mlclass-ex4/mlclass-ex4/debugInitializeWeights.m b/mlclass-ex4/mlclass-ex4/debugInitializeWeights.m
@@ -0,0 +1,22 @@
+function W = debugInitializeWeights(fan_out, fan_in)
+%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
+%incoming connections and fan_out outgoing connections using a fixed
+%strategy, this will help you later in debugging
+%   W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 
+%   of a layer with fan_in incoming connections and fan_out outgoing 
+%   connections using a fix set of values
+%
+%   Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
+%   the first row of W handles the "bias" terms
+%
+
+% Set W to zeros
+W = zeros(fan_out, 1 + fan_in);
+
+% Initialize W using "sin", this ensures that W is always of the same
+% values and will be useful for debugging
+W = reshape(sin(1:numel(W)), size(W)) / 10;
+
+% =========================================================================
+
+end
diff --git a/mlclass-ex4/mlclass-ex4/displayData.m b/mlclass-ex4/mlclass-ex4/displayData.m
@@ -0,0 +1,59 @@
+function [h, display_array] = displayData(X, example_width)
+%DISPLAYDATA Display 2D data in a nice grid
+%   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
+%   stored in X in a nice grid. It returns the figure handle h and the 
+%   displayed array if requested.
+
+% Set example_width automatically if not passed in
+if ~exist('example_width', 'var') || isempty(example_width) 
+	example_width = round(sqrt(size(X, 2)));
+end
+
+% Gray Image
+colormap(gray);
+
+% Compute rows, cols
+[m n] = size(X);
+example_height = (n / example_width);
+
+% Compute number of items to display
+display_rows = floor(sqrt(m));
+display_cols = ceil(m / display_rows);
+
+% Between images padding
+pad = 1;
+
+% Setup blank display
+display_array = - ones(pad + display_rows * (example_height + pad), ...
+                       pad + display_cols * (example_width + pad));
+
+% Copy each example into a patch on the display array
+curr_ex = 1;
+for j = 1:display_rows
+	for i = 1:display_cols
+		if curr_ex > m, 
+			break; 
+		end
+		% Copy the patch
+
+		% Get the max value of the patch
+		max_val = max(abs(X(curr_ex, :)));
+		display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
+		              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
+						reshape(X(curr_ex, :), example_height, example_width) / max_val;
+		curr_ex = curr_ex + 1;
+	end
+	if curr_ex > m, 
+		break; 
+	end
+end
+
+% Display Image
+h = imagesc(display_array, [-1 1]);
+
+% Do not show axis
+axis image off
+
+drawnow;
+
+end