In [1]:
import Pkg;
Pkg.add("BSON")

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/Volumes/GoogleDrive/My Drive/UNOMIC/JuliaTesting/Julia/cifar10conv/Project.toml`
[32m[1m  No Changes[22m[39m to `/Volumes/GoogleDrive/My Drive/UNOMIC/JuliaTesting/Julia/cifar10conv/Manifest.toml`


In [4]:
import Pkg;
Pkg.add("MLDatasets")

[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m JSON3 ─ v1.9.4
[32m[1m    Updating[22m[39m `/Volumes/GoogleDrive/My Drive/UNOMIC/JuliaTesting/Julia/cifar10conv/Project.toml`
 [90m [eb30cadb] [39m[92m+ MLDatasets v0.5.15[39m
[32m[1m    Updating[22m[39m `/Volumes/GoogleDrive/My Drive/UNOMIC/JuliaTesting/Julia/cifar10conv/Manifest.toml`
 [90m [9e28174c] [39m[92m+ BinDeps v1.0.2[39m
 [90m [b99e7846] [39m[92m+ BinaryProvider v0.5.10[39m
 [90m [e1450e63] [39m[92m+ BufferedStreams v1.0.0[39m
 [90m [124859b0] [39m[92m+ DataDeps v0.7.7[39m
 [90m [92fee26a] [39m[92m+ GZip v0.5.1[39m
 [90m [f67ccb44] [39m[92m+ HDF5 v0.16.2[39m
 [90m [cd3eb016] [39m[92m+ HTTP v0.9.17[39m
 [90m [83e8ac13] [39m[92m+ IniFile v0.5.1[39m
 [90m [7d512f48] [39m[92m+ InternedStrings v0.7.0[39m
 [90m [0f8b85d8] [39m[92m+ JSON3 v1.9.4[39m
 [90m [23992714] [39m[92m+ MAT v0.10.3[39m
 [90m [eb30cadb] [39m[92m+ MLDatasets v0.5.15[39m

In [2]:
using Flux, MLDatasets
using Flux: onehotbatch, onecold, DataLoader, Optimiser
using BSON:@save,@load
# using JLD

In [3]:
function ConvMixer(in_channels, kernel_size, patch_size, dim, depth, N_classes)
    f = Chain(
            Conv((patch_size, patch_size), in_channels=>dim, gelu; stride=patch_size),
            BatchNorm(dim),
            [
                Chain(
                    SkipConnection(Chain(Conv((kernel_size,kernel_size), dim=>dim, gelu; pad=SamePad(), groups=dim), BatchNorm(dim)), +),
                    Chain(Conv((1,1), dim=>dim, gelu), BatchNorm(dim))
                ) 
                for i in 1:depth
            ]...,
            AdaptiveMeanPool((1,1)),
            flatten,
            Dense(dim,N_classes)
        )
    return f
end

ConvMixer (generic function with 1 method)

In [4]:
function get_data(batchsize; dataset = MLDatasets.CIFAR10, idxs = nothing)
    """
    idxs=nothing gives the full dataset, otherwise (for testing purposes) only the 1:idxs elements of the train set are given.
    """
    ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" 

    # Loading Dataset
    if idxs==nothing
        xtrain, ytrain = dataset.traindata(Float32)
        xtest, ytest = dataset.testdata(Float32)
	else
        xtrain, ytrain = dataset.traindata(Float32, 1:idxs)
        xtest, ytest = dataset.testdata(Float32, 1:Int(idxs/10))
    end

    # Reshape Data to comply to Julia's (width, height, channels, batch_size) convention in case there are only 1 channel (eg MNIST)
    if ndims(xtrain)==3
        w = size(xtrain)[1]
        xtrain = reshape(xtrain, (w,w,1,:))
        xtest = reshape(xtest, (w,w,1,:))
    end
    
    ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)

    train_loader = DataLoader((xtrain, ytrain), batchsize=batchsize, shuffle=true)
    test_loader = DataLoader((xtest, ytest), batchsize=batchsize)

    return train_loader, test_loader
end

get_data (generic function with 1 method)

In [5]:
function create_loss_function(dataloader, device)

    function loss(model)
        n = 0
        l = 0.0f0
        acc = 0.0f0

        for (x,y) in dataloader
            x,y = x |> device, y |> device
            z = model(x)        
            l += Flux.logitcrossentropy(z, y, agg=sum)
            acc += sum(onecold(z).==onecold(y))
            n += size(x)[end]
        end
        l / n, acc / n
    end

    return loss
   
end

create_loss_function (generic function with 1 method)

In [25]:
function train(n_epochs=10)

    #params: warning, the training can be long with these params
    print("Data Loading")
    train_loader, test_loader = get_data(128)
    print("Data Loading -- Done")
    η = 3e-4
    in_channel = 3
    patch_size = 2
    kernel_size = 7
    dim = 128
    dimPL = 2
    depth = 18
    use_cuda = false

    #logging the losses
    train_save = zeros(n_epochs, 2)
    test_save = zeros(n_epochs, 2)

    if use_cuda
        device = gpu
        @info "Training on GPU"
        print("GPU Select")
    else
        device = cpu
        @info "Training on CPU"
        print("CPU Select")
    end

    train_loss_fn = create_loss_function(train_loader, device)
    test_loss_fn = create_loss_function(test_loader, device)
    
    print("Model Compliling")
    model = ConvMixer(in_channel, kernel_size, patch_size, dim, depth, 10) |> device
    print("Model Compiled")
    
    ps = params(model)
    opt = Optimiser(
            WeightDecay(1f-3), 
            ClipNorm(1.0),
            ADAM(η)
            )
    
    print("- Training Start's - ")
    for epoch in 1:n_epochs
        for (x,y) in train_loader
            x,y = x|>device, y|>device
            gr = gradient(()->Flux.logitcrossentropy(model(x), y, agg=sum), ps)
            Flux.Optimise.update!(opt, ps, gr)
        end

        #logging
        train_loss, train_acc = train_loss_fn(model) |> cpu
        test_loss, test_acc = test_loss_fn(model) |> cpu
        train_save[epoch,:] = [train_loss, train_acc]
        test_save[epoch,:] = [test_loss, test_acc]
        
        
        
        @info "Epoch $epoch : Train loss = $train_loss || Validation accuracy = $test_acc."
#         if epoch%5==0
#             @info "Epoch $epoch : Train loss = $train_loss || Validation accuracy = $test_acc."
#         end

    end

    model = model |> cpu
#     @save "model.bson" model 
#     @save "losses.bson" train_save test_save
end



train (generic function with 2 methods)

In [22]:
if abspath(PROGRAM_FILE) == @__FILE__
    print("Hello")
end

In [23]:
abspath(PROGRAM_FILE)

"/Users/usmanasim/.julia/packages/IJulia/e8kqU/src/kernel.jl"

In [24]:
@__FILE__

"In[24]"

In [None]:
train()