forked from amaas/stanford_dl_ex
-
Notifications
You must be signed in to change notification settings - Fork 9
/
softmax_regression_vec.m
43 lines (37 loc) · 1.32 KB
/
softmax_regression_vec.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
function [f,g] = softmax_regression(theta, X,y)
%
% Arguments:
% theta - A vector containing the parameter values to optimize.
% In minFunc, theta is reshaped to a long vector. So we need to
% resize it to an n-by-(num_classes-1) matrix.
% Recall that we assume theta(:,num_classes) = 0.
%
% X - The examples stored in a matrix.
% X(i,j) is the i'th coordinate of the j'th example.
% y - The label for each example. y(j) is the j'th example's label.
%
m=size(X,2);
n=size(X,1);
% theta is a vector; need to reshape to n x num_classes.
theta=reshape(theta, n, []);
num_classes=size(theta,2)+1;
% initialize objective value and gradient.
f = 0;
g = zeros(size(theta));
%
% TODO: Compute the softmax objective function and gradient using vectorized code.
% Store the objective function value in 'f', and the gradient in 'g'.
% Before returning g, make sure you form it back into a vector with g=g(:);
%
%%% YOUR CODE HERE %%%
h = exp(theta' * X);
h = [h;ones(1,m)];
p = bsxfun(@rdivide,h,sum(h));
logp = log2(p);
index = sub2ind(size(logp),y,[1:m]);
f = -sum(logp(index));
yk = full(sparse(y,1:m,1));
yk = yk(1:num_classes-1,:);
p = p(1:num_classes-1,:);
g = -X * (yk-p)';
g=g(:); % make gradient a vector for minFunc