In [1]:
display("text/html", "<style>.container { width:100% !important; }</style>")

In [28]:
using Pkg;
using Flux
using Images
using FileIO
using Glob
using Mmap
using ImageShow
using Base.Iterators: partition
using Flux: onehotbatch,onecold, crossentropy, throttle
using RandomNumbers
using Plots
using ImageView, Images

Read CIFAR10 data.(https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz)
Data is given in binary format where the first byte is the label of
the first image which is a number between 0 and 9. The next 3072 bytes
are the values of the pixels. The first 1024 are R, the next 1024 are G and
the last 1024 are B.

In [3]:
function loadBatches(path)
    batch = Int.(open(read,path))
    labels = [batch[1+(n-1)*3073] for n in 1:1000]
    pixels = [reshape(batch[2+(n-1)*3073:3073+(n-1)*3073],(32,32,3)) for n in 1:1000]/255
    return (pixels,labels)
end

loadBatches (generic function with 1 method)

In [4]:
# Get file names
path = "./cifar-10-batches-bin/"
trainbatch = readdir(glob"data_batch_*.bin",path)
testbatch = readdir(glob"test_batch.bin",path)


# read files and prepare train and test datasets
for file in trainbatch
    if  file==trainbatch[1]
        global X_t, Y_train = loadBatches(file)
    else
        data = loadBatches(file)
        append!(X_t,data[1])
        append!(Y_train,data[2])
    end
end

In [5]:
# Reshape Xt
X_train =[]
for i in 1:size(X_t)[1]
    append!(X_train,X_t[i])
end
X_train = Float64.(reshape(X_train,32,32,3,:))

X_tt,Y_test = loadBatches(testbatch[1])

X_test =[]
for i in 1:size(X_tt)[1]
    append!(X_test,X_tt[i])
end
X_test = Float64.(reshape(X_test,32,32,3,:))

# One-hot encoding
Y_train = onehotbatch(Y_train,0:9)
Y_test = onehotbatch(Y_test,0:9)

10×1000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 false  false  false   true  false  …  false  false  false  false  false
 false  false  false  false  false     false  false   true  false  false
 false  false  false  false  false     false  false  false  false  false
  true  false  false  false  false      true  false  false   true  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false  …  false  false  false  false  false
 false  false  false  false   true     false  false  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false   true   true  false  false     false   true  false  false   true
 false  false  false  false  false     false  false  false  false  false

In [23]:
X_train[:,:,1,2]

32×32 Array{Float64,2}:
 0.603922  0.54902   0.54902   0.533333  …  0.686275   0.647059   0.639216 
 0.494118  0.568627  0.545098  0.537255     0.611765   0.611765   0.619608 
 0.411765  0.490196  0.45098   0.478431     0.603922   0.623529   0.639216 
 0.4       0.486275  0.576471  0.517647     0.576471   0.513725   0.568627 
 0.490196  0.588235  0.541176  0.592157     0.607843   0.368627   0.168627 
 0.607843  0.596078  0.517647  0.709804  …  0.631373   0.4        0.0745098
 0.67451   0.682353  0.666667  0.796078     0.627451   0.423529   0.0784314
 0.705882  0.698039  0.698039  0.815686     0.654902   0.501961   0.290196 
 0.556863  0.52549   0.670588  0.815686     0.647059   0.603922   0.52549  
 0.435294  0.431373  0.752941  0.796078     0.596078   0.611765   0.466667 
 0.415686  0.521569  0.858824  0.701961  …  0.639216   0.713725   0.431373 
 0.427451  0.639216  0.917647  0.662745     0.643137   0.701961   0.388235 
 0.482353  0.752941  0.898039  0.643137     0.521569   0.490196 

In [43]:
train_idx = partition(1:Int64(length(X_train)/(32*32*3)),10)
test_idx = partition(1:Int64(length(X_test)/(32*32*3)),10)

Base.Iterators.PartitionIterator{UnitRange{Int64}}(1:1000, 10)

In [110]:
function make_partition_index(X,batch_size)
    idx = partition(1:Int64(length(X)/(32*32*3)),batch_size)
    indices = [(minimum(i),maximum(i)) for i in idx]
    return indices
end

make_partition_index (generic function with 1 method)

In [174]:
function make_minibatch(X,Y,batch_size)
    indices = [i for i in make_partition_index(X,batch_size)]
    minibatch_X = [X[:,:,:,indices[i][1]:indices[i][2]] for i in 1:length(indices)]
    minibatch_Y = [Y[indices[i][1]:indices[i][2]] for i in 1:length(indices)]
    return (minibatch_X,minibatch_Y)
end

make_minibatch (generic function with 1 method)

In [176]:
train_set = make_minibatch(X_train,Y_train,128);
test_set = make_minibatch(X_test,Y_test,1);