In [1]:
using Pkg

#Pkg.add("ColorTypes")

using Images, FileIO, DataFrames, Optimisers, ProgressMeter, Flux
using Flux:  onehotbatch, crossentropy

using ColorTypes


In [2]:
function lazy_load_image_data(dir::String, label::Int)
    files = readdir(dir; join=true)  # Get list of image files in directory

    return (begin
        img = Images.load(f)  # Load image

        # Convert RGB image to Float32 and extract channels
        img = Float32.(channelview(img))  # Converts to (C, H, W)

        # Correct the shape to (H, W, C)
        img = permutedims(img, (2, 3, 1))  # From (C, H, W) → (H, W, C)

        # Ensure batch dimension is last (H, W, C, B)
        img = reshape(img, size(img, 1), size(img, 2), size(img, 3), 1)

        # Return tuple (image, label)
        (img, label)
    end for f in files)  # Create a generator
end

;

In [3]:
train_fake_iter = lazy_load_image_data("data/train/FAKE", 0)
train_real_iter = lazy_load_image_data("data/train/REAL", 1)
test_fake_iter = lazy_load_image_data("data/test/FAKE", 0)
test_real_iter = lazy_load_image_data("data/test/REAL", 1)

# Combine the datasets by converting the lazy iterator into a list and then concatenating
train_data = vcat(collect(train_fake_iter), collect(train_real_iter))
test_data = vcat(collect(test_fake_iter), collect(test_real_iter))

# Extract features and labels from the dataset
X_train = [x for (x, _) in train_data]
y_train = Flux.onehotbatch([y for (_, y) in train_data], 0:1)
X_test = [x for (x, _) in test_data]
y_test = Flux.onehotbatch([y for (_, y) in test_data], 0:1)

;

In [13]:
function create_cnn()
    return Chain(
        x -> permutedims(x, (2, 3, 1, 4)), # Swap dimensions to (H, W, C, Batch)
        Conv((3,3), 3=>32, relu, pad = 1),    # First convolution layer (32 filters, 3x3 kernel)
        MaxPool((2,2)),              # Max pooling layer
        Conv((3,3), 32=>64, relu, pad = 1),   # Second convolution layer (64 filters, 3x3 kernel)
        MaxPool((2,2)),              # Max pooling layer
        Conv((3,3), 64=>128, relu, pad = 1),  # Third convolution layer (128 filters, 3x3 kernel)
        MaxPool((2,2)),              # Max pooling layer
        Flux.flatten,                     # Flatten the output of the convolution layers
        Dense(128 * 4 * 4, 10),      # Dense layer to output 10 classes
        softmax                     # Softmax activation to get probabilities
    )
end

model = create_cnn()
;

In [16]:
loss(x, y) = logitcrossentropy(model(x), y)
optimizer = ADAM()
opt_state = Optimisers.setup(Adam(), model)

epochs = 10

batch_size = 32


32

In [11]:
x_sample, y_sample = first(train_fake_iter)
println("Image shape: ", size(x_sample))  # Should be (H, W, C, 1)

Image shape: (32, 32, 3, 1)


In [17]:
function train_model!(model, train_X, train_Y, opt, epochs, batch_size)

    # this is the data loader that will be used to load the data in batches
    data_loader = Flux.DataLoader((train_X, train_Y), batchsize=batch_size, shuffle=true)
    
    # setting up the optimizer and list ot gather the loss
    opt_state = Flux.setup(opt, model)  
    total_loss = []

    # training the model through all epochs
    for epoch in 1:epochs
        epoch_loss = 0
        
        # going through each batch
        for (x, y) in data_loader
            # calculating the gradient and updating the weights
            gs = Flux.gradient(model -> Flux.Losses.crossentropy(model(x), y), model)[1]  
            Flux.update!(opt_state, Flux.trainable(model), gs)

            # updating the epoch loss for plotting later
            epoch_loss += Flux.Losses.crossentropy(model(x), y)
        end

        println("Epoch $epoch complete")
        push!(total_loss, epoch_loss)

    end
    return total_loss

end;

In [18]:
loss_list = train_model!(model, X_train, y_train, optimizer, epochs, batch_size);

LoadError: ArgumentError: no valid permutation of dimensions

In [12]:
for epoch in 1:epochs
    println("Epoch $epoch starting...")

    # Progress bar for training
    prog = ProgressUnknown("Training Progress:")

    # Training on batches of data
    for (x, y) in train_data
        
        # Compute gradients
        grads = Flux.gradient(m -> loss(m(x), y), model)

        # Update model parameters
        Flux.update!(optimizer, model, grads)

        next!(prog)  # Update the progress bar
    end

    println("\nEpoch $epoch complete")

    # Optional: Evaluate on validation set (here we use the test data as an example)
    if epoch % 1 == 0  # Perform validation every epoch
        val_loss = mean(loss(model(x_val), y_val) for (x_val, y_val) in test_data)
        println("Validation Loss after Epoch $epoch: $val_loss")
    end
end

Epoch 1 starting...


LoadError: DimensionMismatch: layer Conv((3, 3), 3 => 32, relu) expects size(input, 3) == 3, but got 32×3×32×1 Array{Float32, 4}

In [None]:
function accuracy(data)
    correct = 0
    total = 0
    for (x, y) in data
        preds = Flux.onecold(model(x))
        labels = Flux.onecold(y)
        correct += sum(preds .== labels)
        total += length(labels)
    end
    return correct / total
end

test_acc = accuracy(test_data)
println("Test Accuracy: ", test_acc)
