In [1]:
using ToyAD
using PyPlot

In [2]:
N = 100 # number of points per class
D = 2 # dimensionality
K = 3 # number of classes
X = AD(zeros(N*K,D)) # data matrix (each row = single example)
y = AD(zeros(N*K,1)) # class labels
for j in range(1,K)
    idx = range(1+N*(j-1), N); #index for X and Y
    r = linspace(0.0,1,N); # radius
    t = linspace((j-1)*4,(j)*4,N) + randn(N)*0.2 # theta
    X.value[idx,:] = [r.*sin(t) r.*cos(t)]
    y.value[idx,1] = j;
end

In [None]:
# lets visualize the data:
scatter(X.value[:, 1], X.value[:, 2], s=40, c=y.value, alpha=0.5)

In [3]:
# initialize parameters randomly
h = 100 # size of hidden layer
W1 = AD(0.01 * randn(D,h))
b1 = AD(zeros(1,h))
W2 = AD(0.01 * randn(h,K))
b2 = AD(zeros(1,K))

ToyAD.AD("val",[0.0 0.0 0.0],false,[0.0 0.0 0.0],ToyAD.ad_constD,ToyAD.AD[],1)

In [4]:
# some hyperparameters
step_size = 1e-0
reg = 1e-3 # regularization strength

0.001

In [8]:
# gradient descent loop
num_examples = size(X.value,1)
numIterations = 2
J = zeros(numIterations,1);
for i in 1:numIterations
    # evaluate class scores, [N x K]
    hidden1 = relu(X*W1 .+ b1) # note, ReLU activation
    hidden2 = hidden1*W2 .+ b2
    output = softmax(hidden2)
    
    #= compute the loss: average cross-entropy loss and regularization
    corect_logprobs = zeros(num_examples)
    for j in 1:num_examples
        corect_logprobs[j] = -log(output.value[j,y[j]]);
    end
    data_loss = sum(corect_logprobs)/num_examples
    reg_loss = 0.5*reg*sum(W1.value .^2) + 0.5*reg*sum(W2.value .^2)
    J[i,:] = data_loss + reg_loss
    if i==1 || i % 1000 == 0
        println("iteration: ", i," loss: ", J[i,:])
    end
    =#
    
    # backpropate the gradient to the parameters
    backprop(output, true)

    # add regularization gradient contribution
    #W2.derivative += reg * W2.value
    #W1.derivative += reg * W1.value

    # perform a parameter update
    #W1.value += -step_size * W1.derivative
    #W1.derivative = W1.derivative .* 0
    #b1.value += -step_size * b1.derivative
    #b1.derivative = b1.derivative .* 0
    #W2.value += -step_size * W2.derivative
    #W2.derivative = W2.derivative .* 0
    #b2.value += -step_size * b2.derivative
    #b2.derivative = b2.derivative .* 0
end

LoadError: LoadError: DimensionMismatch("dimensions must match")
while loading In[8], in expression starting on line 5

In [None]:
# plot the cost per iteration
plot(1:length(J), J)
xlabel("Iterations")
ylabel("Cost")
grid("on")

In [None]:
# evaluate training set accuracy
hidden_layer = max(0, X*W1 .+ b1)
scores = hidden_layer*W2 .+ b2
predicted_class = zeros(size(scores,1))
for i in 1:size(scores,1)
    predicted_class[i] = indmax(scores[i,:])
end
#println(predicted_class)
correct = 0;
for i in 1:length(y)
    if y[i] == predicted_class[i]
        correct = correct + 1;
    end
end
println("training accuracy: ", correct/length(y))

In [None]:

# plot the resulting classifier
h = 0.02;
x_min = minimum(X[:, 1]) - 1;
x_max = maximum(X[:, 1]) + 1;
y_min = minimum(X[:, 2]) - 1;
y_max = maximum(X[:, 2]) + 1;
numX = convert(Int, floor((x_max - x_min)/h));
xx = zeros(numX);
xx[1] = x_min;
yy = zeros(numX);
yy[1] = y_min;
for i in 2:numX
    xx[i] = xx[i-1] + h;
    yy[i] = yy[i-1] + h;
end
grid_x = [i for i in xx, j in yy];
grid_y = [j for i in xx, j in yy];
xy = [grid_x[:] grid_y[:]];
z0 = xy*W1 .+ b1
z0[z0 .< 0] = 0 
z = z0*W2 .+ b2
zz = zeros(size(z,1));
for i in 1:size(z,1)
    zz[i] = indmax(z[i,:])
end
zz = reshape(zz, size(grid_x));

In [None]:
contourf(grid_x, grid_y, zz, cmap=get_cmap("Spectral"), alpha=0.8) 
scatter(X[:, 1], X[:, 2], c=y, s=40)