## COS 495 PSET 5: ConvNets at Scale

If you're running locally, you'll have to install TensorFlow via `Pkg.checkout("TensorFlow")`
You'll also have to install Plots and Images via `Pkg.add`.
And install MLDatasets via Pkg.clone("https://github.com/JuliaML/MLDatasets.jl.git")
All these packages come pre-installed on the AMI.

In [24]:
using TensorFlow, MLDatasets

[1m[34mINFO: Precompiling module GZip.
[0m

In [25]:
train_x, train_y = CIFAR10.traindata()
train_y .+= 1      # transform 0..9 to 1..10
categories = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

type DataLoader
    cur_id::Int
    order::Vector{Int}
end

loader_train = DataLoader(1, shuffle(1:45000))
loader_val = DataLoader(1, shuffle(45001:50000))    # use last 5000 examples for validation set

function next_batch(loader::DataLoader, batch_size)
    x = zeros(Float32, batch_size, 32*32*3)
    y = zeros(Float32, batch_size, 10)
    for i in 1:batch_size
        x[i, :] = train_x[:,:,:,loader.order[loader.cur_id]][:]
        label = train_y[loader.order[loader.cur_id]]
        y[i, Int(label)] = 1.0
        loader.cur_id += 1
        if loader.cur_id > length(loader.order)
            loader.cur_id = 1
        end
    end
    x, y
end

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 99  162M   99  162M    0     0  6771k      0  0:00:24  0:00:24 --:--:-- 6179k  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  162M  100  162M    0     0  6771k      0  0:00:24  0:00:24 --:--:-- 5831k


next_batch (generic function with 1 method)

In [28]:
using Plots, Images
gr(size=(600,600))

# function for displaying a stack of images
# imgstack is mxnxp array that contains p images, each of which is mxn 
function montage(imgstack,titles)
    plot(
        [plot(
                colorview(RGB,permutedims(imgstack[:,:,:,i],(3,1,2))),
                title=titles[i],
                aspect_ratio=:equal
            ) for i=1:size(imgstack,4)]...,
        legend = :none, axis = nothing     # options necessary to get nice spacing of the images
    )
end

[1m[34mINFO: Precompiling module Images.
[0m

montage (generic function with 1 method)

### View some images and labels

In [29]:
montage(train_x[:,:,:,1:16],categories[train_y[1:16]])

In [30]:
# Parameters
learning_rate = 0.001
training_iters = 2000
batch_size = 128
val_batch_size = 512
display_step = 50

# Network Parameters
input_shape = [32, 32, 3] # 32x32 RGB images
n_classes = 10 # CIFAR-10 total classes
dropout = 1.0 # probability to keep units (1.0 indicates no dropout)

# tf Graph input
session = Session(Graph())

x = placeholder(Float32)
y = placeholder(Float32)
keep_prob = placeholder(Float32) # dropout keep probability

W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.


<Tensor placeholder_3:1 shape=unknown dtype=Float32>

In [31]:
# Create some wrappers for simplicity
function conv2d(x, W, b, strides=1)
    # Conv2D wrapper, with bias and relu activation
    return nn.relu( b + nn.conv2d(x, W, [1, strides, strides, 1], "SAME") )
end

function maxpool2d(x, k=2)
    # MaxPool2D wrapper
    return nn.max_pool(x, [1, k, k, 1], [1, k, k, 1], "SAME")
end

maxpool2d (generic function with 2 methods)

In [32]:
# Create model
function conv_net(x, weights, biases, dropout)
    # Reshape input picture
    x = reshape(x, [-1, 32, 32, 3])

    conv1 = maxpool2d(conv2d(x, weights["wc1"], biases["bc1"]))
    conv2 = conv2d(conv1, weights["wc2"], biases["bc2"])
    conv3 = maxpool2d(conv2d(conv2, weights["wc3"], biases["bc3"]))

    # transition to fully connected layer
    # Reshape conv3 output to fit fully connected layer input
    fc1 = reshape(conv3, [-1, get(get_shape(weights["wd1"]).dims[1])])
    fc1 = nn.relu( fc1 * weights["wd1"] + biases["bd1"] )
    # Apply Dropout
    fc1 = nn.dropout(fc1, dropout)
    
    # Output, class prediction 
    return fc1 * weights["out"] + biases["out"]
end

conv_net (generic function with 1 method)

In [35]:
# Store layers weight & bias
weights = Dict(
    # 3x3 conv, 3 inputs, 32 outputs
    "wc1" => Variable(0.05*randn(Float32, 3, 3, 3, 32)),
    # 3x3 conv, 32 inputs, 64 outputs
    "wc2" => Variable(0.05*randn(Float32, 3, 3, 32, 64)),
    # 3x3 conv, 64 inputs, 64 outputs
    "wc3" => Variable(0.05*randn(Float32, 3, 3, 64, 64)),
    # fully connected, 7*7*64 inputs, 512 outputs
    "wd1" => Variable(0.05*randn(Float32, 8*8*64, 512)),
    # fully connected, 512 inputs, 512 outputs
    "out" => Variable(0.05*randn(Float32, 512, n_classes))
)

biases = Dict(
    "bc1" => Variable(zeros(Float32, 32)),
    "bc2" => Variable(zeros(Float32, 64)),
    "bc3" => Variable(zeros(Float32, 64)),
    "bd1" => Variable(zeros(Float32, 512)),
    "out" => Variable(zeros(Float32, n_classes))
)

# Construct model
pred = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
soft = nn.softmax(pred) 
#cost = reduce_mean(nn.softmax_cross_entropy_with_logits(pred, y))   # the right way but alas not wrapped for Julia
cross_entropy = reduce_mean(-reduce_sum(y.*log(soft), axis=[2]))   # could cause underflow/overflow problems

optimizer = train.minimize(train.AdamOptimizer(learning_rate), cross_entropy)

# Evaluate model
correct_pred = indmax(pred, 2) .== indmax(y, 2)
accuracy = reduce_mean(cast(correct_pred, Float32))

# Initializing the variables
init = global_variables_initializer()

LoadError: MethodError: no method matching reduce_sum(::TensorFlow.Tensor, ::Int64)[0m
Closest candidates are:
  reduce_sum(::TensorFlow.AbstractTensor; keep_dims, reduction_indices, name) at /Users/Joshi/.julia/v0.5/TensorFlow/src/ops/math.jl:305[0m

In [36]:
Pkg.status()

12 required packages:
 - GR                            0.19.0
 - Gadfly                        0.5.3
 - HDF5                          0.7.3
 - IJulia                        1.4.1
 - Images                        0.7.0
 - Interact                      0.4.3
 - Ipopt                         0.2.6
 - MNIST                         0.0.2
 - Plotly                        0.1.1              master
 - Plots                         0.10.3
 - TensorFlow                    0.4.5              master
 - Vega                          0.6.8
103 additional packages:
 - AxisAlgorithms                0.1.6
 - AxisArrays                    0.0.3
 - BinDeps                       0.4.5
 - Blink                         0.5.1
 - Blosc                         0.2.0
 - BufferedStreams               0.3.0
 - Calculus                      0.2.0
 - CatIndices                    0.0.1
 - Codecs                        0.2.0
 - ColorBrewer                   0.3.0
 - ColorTypes                    0.3.3
 - ColorVector

In [34]:
# launch the graph
tic()
run(session, init)

# initialize accuracy/loss arrays
train_acc = zeros(div(training_iters,display_step))
train_loss = zeros(div(training_iters,display_step))
val_acc = zeros(div(training_iters,display_step))
val_loss = zeros(div(training_iters,display_step))

# keep training until reach max iterations
for step = 1:training_iters
    batch_x, batch_y = next_batch(loader_train, batch_size)
    run(session, optimizer, Dict(x => batch_x, y => batch_y, keep_prob => dropout))
    if step % display_step == 0
        println(step)
        ibatch = div(step,display_step)
        train_loss[ibatch], train_acc[ibatch] =
            run(session, [cross_entropy, accuracy], Dict(x => batch_x, y => batch_y, keep_prob => 1.))
        val_batch_x, val_batch_y = next_batch(loader_val, val_batch_size)
        val_loss[ibatch], val_acc[ibatch], this_soft = 
            run(session, [cross_entropy, accuracy, soft], Dict( x => val_batch_x, y=> val_batch_y, keep_prob=> 1.))
        
        IJulia.clear_output(true)
        xvals = display_step:display_step:step
        truelabel = indmax(val_batch_y[1,:])
        plot(
            plot(xvals,[train_acc[1:ibatch],val_acc[1:ibatch]], 
                title="Classification accuracy", 
                label=["training", "validation"],
                xlabel="# minibatches",
                ylabel="Accuracy"
                ),
            plot(xvals,[train_loss[1:ibatch],val_loss[1:ibatch]], 
                title="Cross entropy loss",
                label=["training", "validation"],
                xlabel="# minibatches"
                ),
            plot(colorview(RGB,permutedims(reshape(val_batch_x[1,:],32,32,3),(3,1,2))),
                title = "Input image",
                xlabel = string("true label: ", truelabel, " ", categories[truelabel])
                ),
            bar(this_soft[1,:],
                title = "Class probabilities",
                legend = :none
            )
        ) |> display
        sleep(0.01)
    end
end
toc()

LoadError: UndefVarError: init not defined