# 3 Layer Network on MNIST

In [1]:
using Pkg;Pkg.add("MLDatasets")
using MLDatasets
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata();

┌ Info: Precompiling MLDatasets [eb30cadb-4394-5ae3-aed4-317e484a6458]
└ @ Base loading.jl:1260


In [2]:
(images, labels) = (reshape(train_x[:,:,1:1000], (28*28, 1000)), train_y[1:1000])
one_hot_labels = zeros(10,length(labels))
for (i,l) in enumerate(labels)
    one_hot_labels[l+1, i] = 1.0
end
labels = one_hot_labels

test_images = reshape(test_x, (28*28, size(test_x,3)))
test_labels = zeros((10, size(test_x,3)))

for (i,l) in enumerate(test_y)
    test_labels[l+1, i] = 1.0
end

using Random
Random.seed!(1)


relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 40, 40, 784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images,2)
        layer_0 = images[:,i]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2
        Error += sum((labels[:, i]' .- layer_2) .^ 2)
        
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[:, i]'))
        
        layer_2_delta = (labels[:, i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)
        
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    print("I : $(j) Train error: $(Error/size(images, 2)) Train accuracy: $(Correct_cnt/size(images, 2))   \r")
    
end
        

I : 40 Train error: 0.0682271128273786 Train accuracy: 0.993    

In [5]:
if (j % 10 == 0) || (j == iterations)
    Error, Correct_cnt = (0.0, 0)

    for i = 1:size(test_images, 2)
        
        layer_0 = test_images[:, i]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2
        
        Error += sum((test_labels[:, i]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[:, i]'))

    end
    print("Test-Err:: $(Error/size(test_images,2)) Test-Acc:: $(Correct_cnt/size(test_images,2))   \r")
end

Test-Err:: 0.27701831632546603 Test-Acc:: 0.8727   

In [7]:
(images, labels) = (reshape(train_x[:,:,1:1000], (28*28, 1000)), train_y[1:1000])
one_hot_labels = zeros(10,length(labels))
for (i,l) in enumerate(labels)
    one_hot_labels[l+1, i] = 1.0
end
labels = one_hot_labels

test_images = reshape(test_x, (28*28, size(test_x,3)))
test_labels = zeros((10, size(test_x,3)))

for (i,l) in enumerate(test_y)
    test_labels[l+1, i] = 1.0
end

using Random
Random.seed!(1)


relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 40, 40, 784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images,2)
        layer_0 = images[:,i]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2
        Error += sum((labels[:, i]' .- layer_2) .^ 2)
        
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[:, i]'))
        
        layer_2_delta = (labels[:, i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)
        
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    print("I : $(j) Train error: $(Error/size(images, 2)) Train accuracy: $(Correct_cnt/size(images,2))   \r")
    
    if (j % 5 == 0) || (j == iterations)
        Error, Correct_cnt = (0.0, 0)
        for i = 1:size(test_images, 2)
        
            layer_0 = test_images[:, i]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            Error += sum((test_labels[:, i]' .- layer_2) .^ 2)
            Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[:, i]'))
        end
    println()
    println("Test-Err:: $(Error/size(test_images, 2)) Test-Acc:: $(Correct_cnt/size(test_images, 2))")
    end
end


I : 5 Train error: 0.29739439558840397 Train accuracy: 0.878   
Test-Err:: 0.3767939044628232 Test-Acc:: 0.8162
I : 10 Train error: 0.18814760092583613 Train accuracy: 0.942   
Test-Err:: 0.30468604953956663 Test-Acc:: 0.8629
I : 15 Train error: 0.14439583078193777 Train accuracy: 0.967   
Test-Err:: 0.2836949460454106 Test-Acc:: 0.8731
I : 20 Train error: 0.11903362765796817 Train accuracy: 0.979   
Test-Err:: 0.27646178423027645 Test-Acc:: 0.8724
I : 25 Train error: 0.10114517964328593 Train accuracy: 0.987   
Test-Err:: 0.2739270039563274 Test-Acc:: 0.8723
I : 30 Train error: 0.0875160775763943 Train accuracy: 0.991    
Test-Err:: 0.2733526287620866 Test-Acc:: 0.8738
I : 35 Train error: 0.07688726309627121 Train accuracy: 0.992   
Test-Err:: 0.274741321852286 Test-Acc:: 0.8726
I : 40 Train error: 0.06822711282737862 Train accuracy: 0.993   
Test-Err:: 0.27701831632546603 Test-Acc:: 0.8727


# Dropout In Code

In [None]:
i = 1
layer_0 = images[:, i]
dropout_mask = bitrand(size(layer_1))
layer_1 = relu.(layer_0' * weights_0_1)
layer_1 .*= dropout_mask .* 2
layer_2 = layer_1 * weights_1_2

Error += sum((labels[:, i]' .- layer_2) .^ 2)
Correct_cnt += Int(argmax(layer_2) == argmax(labels[:, i]'))

layer_2_delta = (labels[i]' .- layer_2)
layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

layer_1_delta .*= dropout_mask


In [27]:
using Random
Random.seed!(1)

relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images, 2)
        layer_0 = images[:, i]
        layer_1 = relu.(layer_0' * weights_0_1)
        dropout_mask = bitrand(size(layer_1))
        layer_1 .*= dropout_mask .* 2
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[:, i]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[:, i]'))

        layer_2_delta = (labels[:, i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

        layer_1_delta .*= dropout_mask
        
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    
    if (j % 10 == 0) || (j == iterations)
        test_Error, test_Correct_cnt = (0.0, 0)
        for i = 1:size(test_images, 2)
        
            layer_0 = test_images[:, i]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            test_Error += sum((test_labels[:, i]' .- layer_2) .^ 2)
            test_Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[:, i]'))
        end
        println("I: $(j) Train error: $(Error/size(images, 2)) Train accuracy: $(Correct_cnt/size(images, 2)) Test-Err:: $(test_Error/size(test_images, 2)) Test-Acc:: $(test_Correct_cnt/size(test_images, 2))")
    end
end

I: 10 Train error: 0.40356431055896214 Train accuracy: 0.821 Test-Err:: 0.37596035249840204 Test-Acc:: 0.8267
I: 20 Train error: 0.3316619242484017 Train accuracy: 0.864 Test-Err:: 0.3210854633568354 Test-Acc:: 0.8565
I: 30 Train error: 0.28404975000597044 Train accuracy: 0.909 Test-Err:: 0.297259838898316 Test-Acc:: 0.8695
I: 40 Train error: 0.2498115288206865 Train accuracy: 0.938 Test-Err:: 0.2893557843097215 Test-Acc:: 0.872
I: 50 Train error: 0.22600793670063843 Train accuracy: 0.94 Test-Err:: 0.28712980676338556 Test-Acc:: 0.8747
I: 60 Train error: 0.22531916559425316 Train accuracy: 0.955 Test-Err:: 0.28186890220650884 Test-Acc:: 0.875
I: 70 Train error: 0.22171653734668323 Train accuracy: 0.95 Test-Err:: 0.2829342462900369 Test-Acc:: 0.8761
I: 80 Train error: 0.19041724520488393 Train accuracy: 0.964 Test-Err:: 0.2810256102909915 Test-Acc:: 0.8742
I: 90 Train error: 0.19813206766203406 Train accuracy: 0.961 Test-Err:: 0.28149859920679754 Test-Acc:: 0.8777
I: 100 Train error: 0.

# Batch Gradient Descent

In [59]:
using Random
Random.seed!(1)

relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:batch_size:size(images,2)-batch_size
        batch_start, batch_end = i, i+batch_size-1
        layer_0 = images[:, batch_start:batch_end]
        layer_1 = relu.(layer_0' * weights_0_1)
        
        dropout_mask = bitrand(size(layer_1))
        layer_1 .*= (dropout_mask .* 2)
        layer_2 = layer_1 * weights_1_2
        
        Error += sum((labels[:, batch_start:batch_end]' .- layer_2) .^ 2)
        
        for k=1:batch_size
            Correct_cnt += Int(argmax(layer_2[k, :]) == argmax(labels[:, batch_start+k-1]))
            layer_2_delta = (labels[:, batch_start:batch_end]' .- layer_2) ./batch_size
            layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

            layer_1_delta .*= dropout_mask

            weights_1_2 += alpha .* layer_1' * layer_2_delta
            weights_0_1 += alpha .* layer_0 * layer_1_delta
        end
    end
        
    if (j % 10 == 0)
        test_Error, test_Correct_cnt = (0.0, 0)
        for i = 1:size(test_images, 2)

            layer_0 = test_images[:, i]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            test_Error += sum((test_labels[:, i]' .- layer_2) .^ 2)
            test_Correct_cnt += Int(argmax(layer_2[1,:]) == argmax(test_labels[:, i]))
        end
        println("I: $(j) Train error: $(Error/size(images, 2)) Train accuracy: $(Correct_cnt/size(images, 2)) Test-Err:: $(test_Error/size(test_images, 2)) Test-Acc:: $(test_Correct_cnt/size(test_images, 2))")
    end
end



I: 10 Train error: 0.44005504903415854 Train accuracy: 0.447 Test-Err:: 0.5814515684878412 Test-Acc:: 0.6908
I: 20 Train error: 0.3821944487765399 Train accuracy: 0.509 Test-Err:: 0.5082174537259565 Test-Acc:: 0.7475
I: 30 Train error: 0.3337470974674489 Train accuracy: 0.531 Test-Err:: 0.46339433903535265 Test-Acc:: 0.7747
I: 40 Train error: 0.3065819905145321 Train accuracy: 0.563 Test-Err:: 0.43126136608968446 Test-Acc:: 0.7865
I: 50 Train error: 0.2838652991377701 Train accuracy: 0.579 Test-Err:: 0.4063259786700258 Test-Acc:: 0.8045
I: 60 Train error: 0.2685204515587329 Train accuracy: 0.584 Test-Err:: 0.39027730787014797 Test-Acc:: 0.8141
I: 70 Train error: 0.26185819369436936 Train accuracy: 0.585 Test-Err:: 0.37423314258394824 Test-Acc:: 0.8195
I: 80 Train error: 0.24019714807604417 Train accuracy: 0.612 Test-Err:: 0.36445480684902587 Test-Acc:: 0.8267
I: 90 Train error: 0.2275176534585732 Train accuracy: 0.616 Test-Err:: 0.353904891368789 Test-Acc:: 0.8342
I: 100 Train error: 0