In [1]:
using Pkg
Pkg.activate("libs/")
# Pkg.instantiate()
# Pkg.add("MLUtils")
using CSV
using JLD2
using CUDA
using Glob
using Dates
# using Zygote
using DICOM
using Images
using MLUtils
using Setfield
using ImageView
using ImageDraw
using Statistics
using DataFrames
using StaticArrays
using MLDataPattern
using ChainRulesCore
using Distributions: Normal
using FastAI, FastVision, Flux, Metalhead
import CairoMakie; CairoMakie.activate!(type="png")

[32m[1m  Activating[22m[39m project at `~/Desktop/Project BAC/BAC project/libs`


/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/molloi-lab/snap/code/common/.cache/gio-modules/libgiolibproxy.so
/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/molloi-lab/snap/code/common/.cache/gio-modules/libgiolibproxy.so


Check how many threads we have

In [2]:
Threads.nthreads()

64

List all CUDA devices

In [3]:
CUDA.allowscalar(false)
CUDA.devices()

CUDA.DeviceIterator() for 4 devices:
0. NVIDIA GeForce RTX 4090
1. NVIDIA GeForce RTX 4090
2. NVIDIA GeForce RTX 4090
3. NVIDIA GeForce RTX 4090

# Notes

1. In this training, images' pixel values are zoomed into range = [0, 1]. This should not be applied to final plan since different images have different min and max.

2. In this training, only `pixel values` are being used. In future, we can feed more features to the NN to get a better information like `L_mass` or `R_mass`.

In [4]:
patch_size = 256
patch_size_half = round(Int, patch_size/2);

# Helper functions

In [5]:
"""
    This function zoom all pixel values into [0, 1].
"""
function zoom_pxiel_values(img)
    a, b = minimum(img), maximum(img)
    if b-a != 0
        img = (img .- a) / (b - a)
    end
    return img
end

zoom_pxiel_values

In [6]:
"""
    This function takes in a img of various size, 
    returns patches with size = patch_size * patch_size.
"""
function patch_image(img, lbl)
    s = size(img)
    x = ceil(Int, s[1]/patch_size) + floor(Int, (s[1]-patch_size_half)/patch_size)
    y = ceil(Int, s[2]/patch_size) + floor(Int, (s[2]-patch_size_half)/patch_size)
    num_patches = x*y
    img_patches = Array{Float32, 4}(undef, patch_size, patch_size, 1, num_patches)
    lbl_patches = Array{Float32, 4}(undef, patch_size, patch_size, 1, num_patches)
    ct = 0
    for i = 1 : x-1
        x_start = 1+(i-1)*patch_size_half
        x_end = x_start+patch_size-1
        for j = 1 : y-1
            y_start = 1+(j-1)*patch_size_half
            y_end = y_start+patch_size-1
            # save patch
            ct += 1
            img_patches[:, :, 1, ct] = zoom_pxiel_values(img[x_start:x_end, y_start:y_end])
            lbl_patches[:, :, 1, ct] = lbl[x_start:x_end, y_start:y_end]
        end
        # right col
        y_start, y_end = s[2]-patch_size+1, s[2]
        # save patch
        ct += 1
        img_patches[:, :, 1, ct] = zoom_pxiel_values(img[x_start:x_end, y_start:y_end])
        lbl_patches[:, :, 1, ct] = lbl[x_start:x_end, y_start:y_end]
    end
    # last row
    x_start, x_end = s[1]-patch_size+1, s[1]
    for j = 1 : y-1
        y_start = 1+(j-1)*patch_size_half
        y_end = y_start+patch_size-1
        # save patch
        ct += 1
        img_patches[:, :, 1, ct] = zoom_pxiel_values(img[x_start:x_end, y_start:y_end])
        lbl_patches[:, :, 1, ct] = lbl[x_start:x_end, y_start:y_end]
    end
    # right col
    y_start, y_end = s[2]-patch_size+1, s[2]
    # save patch
    ct += 1
    img_patches[:, :, 1, ct] = zoom_pxiel_values(img[x_start:x_end, y_start:y_end])
    lbl_patches[:, :, 1, ct] = lbl[x_start:x_end, y_start:y_end]
    # return
    return num_patches, img_patches, lbl_patches
end

patch_image

In [7]:
"""
    This function fixs the path to the images and labels.
"""
function fix_path!(data_set)
    num_data = size(data_set)[1]
    Threads.@threads for i = 1 : num_data
        for j = 1 : 2
            for k = 1 : 4
                # modify img path
                splited = split(deepcopy(data_set[i][j][k]), "\\")
                if size(splited)[1] > 1
                    new_path = joinpath("../collected_dataset_for_ML", joinpath(splited[4:end]))
                    data_set[i][j][k] = new_path
                end
            end
        end
    end
end

fix_path!

In [8]:
"""
    This function check how many number of images and labels there will be after patching.
"""
function get_num_of_imgs(data_set)
    num_data = size(data_set)[1]
    cts = Array{Int}(undef, num_data*4)
    Threads.@threads for i = 1 : num_data
        @views t = train_set[i]
        for j = 1 : 4
            # read dicom images
            s = size(dcm_parse(t[1][j])[(0x7fe0, 0x0010)])
            x = ceil(Int, s[1]/patch_size) + floor(Int, (s[1]-patch_size_half)/patch_size)
            y = ceil(Int, s[2]/patch_size) + floor(Int, (s[2]-patch_size_half)/patch_size)
            # save 
            cts[(i-1)*4+j] = x*y
        end
    end
    return cts
end

get_num_of_imgs

# 1. Prepare

In [9]:
@load "clean_set_step2_for_ubuntu.jld2" train_set valid_set

2-element Vector{Symbol}:
 :train_set
 :valid_set

In [10]:
data_dir = "../collected_dataset_for_ML";

Check if dataset is found

In [11]:
isdir(data_dir)

true

## 1.1 Load train set & valid set
container format: patch_size * patch_size * 1 * num_imgs

In [12]:
# get num of total patches(train)
ct_patches_train = get_num_of_imgs(train_set)
num_patches_train = sum(ct_patches_train)

703276

In [13]:
# runtime: 50s
num_train_data = size(train_set)[1]
train_container_images = Array{Float16, 4}(undef, patch_size, patch_size, 1, num_patches_train)
train_container_masks = Array{Float16, 4}(undef, patch_size, patch_size, 1, num_patches_train)
Threads.@threads for i = 1 : num_train_data
    start_idx = sum(ct_patches_train[1:i-1])+1
    for j = 1 : 4 # 4 images each patient
        # read dicom images
        img = Float16.(dcm_parse(train_set[i][1][j])[(0x7fe0, 0x0010)])
        # read png images
        lbl = Float16.(Images.load(train_set[i][2][j]))
        # process image
        num_patches, img_patches, lbl_patches = patch_image(img, lbl)
        # save 
        end_idx = start_idx+num_patches-1
        train_container_images[:, :, 1, start_idx : end_idx] = img_patches
        train_container_masks[:, :, 1, start_idx : end_idx] = lbl_patches
        start_idx = end_idx
    end
end

In [14]:
# get num of total patches(valid)
ct_patches_valid = get_num_of_imgs(valid_set)
num_patches_valid = sum(ct_patches_valid)

113564

In [15]:
# runtime: 7.5s
num_valid_data = size(valid_set)[1]
valid_container_images = Array{Float16, 4}(undef, patch_size, patch_size, 1, num_patches_valid)
valid_container_masks = Array{Float16, 4}(undef, patch_size, patch_size, 1, num_patches_valid)
Threads.@threads for i = 1 : num_valid_data
    start_idx = sum(ct_patches_valid[1:i-1])+1
    for j = 1 : 4 # 4 images each patient
        # read dicom images
        img = Float16.(dcm_parse(valid_set[i][1][j])[(0x7fe0, 0x0010)])
        # read png images
        lbl = Float16.(Images.load(valid_set[i][2][j]))
        # process image
        num_patches, img_patches, lbl_patches = patch_image(img, lbl)
        # save 
        end_idx = start_idx+num_patches-1
        valid_container_images[:, :, 1, start_idx : end_idx] = img_patches
        valid_container_masks[:, :, 1, start_idx : end_idx] = lbl_patches
        start_idx = end_idx
    end
end

In [16]:
GC.gc(true)

## 1.2 Create dataloaders

In [17]:
b_s = 10
batch_size = b_s*3
# batch_size = 20
train_loader = MLUtils.DataLoader((data=train_container_images, label=train_container_masks), batchsize=batch_size)
test_loader = MLUtils.DataLoader((data=valid_container_images, label=valid_container_masks), batchsize=b_s);

## 1.3 Create Model

In [18]:
function _random_normal(shape...)
    return Float32.(rand(Normal(0.0,0.02),shape...))
end

_conv = (stride, in, out) -> Conv((3, 3), in=>out, stride=stride, pad=SamePad();init=_random_normal)
_tran = (stride, in, out) -> ConvTranspose((2, 2), in=>out, stride=stride, pad=SamePad();init=_random_normal)
# _conv = (stride, in, out) -> Conv((3, 3), in=>out, stride=stride, pad=SamePad())
# _tran = (stride, in, out) -> ConvTranspose((2, 2), in=>out, stride=stride, pad=SamePad())

conv1 = (in, out) -> Chain(_conv(1, in, out), BatchNorm(out, leakyrelu))
conv2 = (in, out) -> Chain(_conv(2, in, out), BatchNorm(out, leakyrelu))
conv3 = (in, out) -> Chain(_conv(1, in, out), x -> softmax(x; dims = 3))
# conv3 = (in, out) -> Chain(_conv(1, in, out), sigmoid)
tran2 = (in, out) -> Chain(_tran(2, in, out), BatchNorm(out, leakyrelu))



function unet2D(in_chs, lbl_chs)
    # Contracting layers
    l1 = Chain(conv1(in_chs, 64), conv1(64, 64))
    l2 = Chain(l1, MaxPool((2,2), stride=2), conv1(64, 128), conv1(128, 128))
    l3 = Chain(l2, MaxPool((2,2), stride=2), conv1(128, 256), conv1(256, 256))
    l4 = Chain(l3, MaxPool((2,2), stride=2), conv1(256, 512), conv1(512, 512))
    l5 = Chain(l4, MaxPool((2,2), stride=2), conv1(512, 1024), conv1(1024, 1024), tran2(1024, 512))

    # Expanding layers
    l6 = Chain(Parallel(FastVision.Models.catchannels,l5,l4), 
                conv1(512+512, 512),
                conv1(512, 512),
                tran2(512, 256))
    l7 = Chain(Parallel(FastVision.Models.catchannels,l6,l3), 
                conv1(256+256, 256),
                conv1(256, 256),
                tran2(256, 128))
    l8 = Chain(Parallel(FastVision.Models.catchannels,l7,l2), 
                conv1(128+128, 128),
                conv1(128, 128),
                tran2(128, 64))
    l9 = Chain(Parallel(FastVision.Models.catchannels,l8,l1), 
                conv1(64+64, 64),
                conv1(64, 64),
                conv3(64, lbl_chs))
end

unet2D (generic function with 1 method)

## 1.4 Create Loss

In [19]:
function dice_loss(ŷ, y; ϵ=1f-5)
    # ŷ, y = Float32.(ŷ), Float32.(y)

    @inbounds loss_dice = 
        1f0 - (muladd(2f0, sum(ŷ[:,:,2,:] .* y[:,:,1,:]), ϵ) / (sum(ŷ[:,:,2,:] .^ 2) + sum(y[:,:,1,:] .^ 2) + ϵ))
    return loss_dice
    # loss_dice = 0f0
    # for chan_idx = 1 : 2
    #     @inbounds loss_dice += 
    #     1f0 - (muladd(2f0, sum(ŷ[:,:,chan_idx,:] .* y[:,:,chan_idx,:]), ϵ) / (sum(ŷ[:,:,chan_idx,:] .^ 2) + sum(y[:,:,chan_idx,:] .^ 2) + ϵ))
    # end
    # return loss_dice/2f0
end
lossfn = dice_loss
# lossfn = Flux.Losses.dice_coeff_loss

dice_loss (generic function with 1 method)

## 1.5 Loop

In [20]:
function data_parallel(device_id, x, y, model, model_ps)
	device!(device_id)
	x_gpu = x |> gpu
	y_gpu = Float32.(y) |> gpu
	ls, gs = Flux.withgradient(model_ps) do
		lossfn(model(x_gpu), y_gpu)
	end
	CUDA.unsafe_free!(x_gpu)
	CUDA.unsafe_free!(y_gpu)
	return ls, gs
end

# function update_params!(id_from, id_to, ps_from, ps_to)
# 	Threads.@threads for i = 1 : 90
# 		device!(id_from)
# 		p = ps_from[i] |> cpu
# 		device!(id_to)
# 		# ps_to[i] .= nothing
# 		ps_to[i] .= p |> gpu
# 	end
# end

function gradient_to_array(device_id, gs)
	rslt = nothing
	ignore_derivatives() do
		device!(device_id)
		gs_array_gpu = [gs[p] for p in gs.params]
		gs_array_cpu = gs_array_gpu |> cpu
		Threads.@threads for i = 1 : 90
			CUDA.unsafe_free!(gs_array_gpu[i])
		end
		for p in gs.params
			CUDA.unsafe_free!(gs.grads[p])
			# CUDA.unsafe_free!(p)
		end
		device!(0)
		rslt = gs_array_cpu |> gpu
	end
    return rslt
end

function train_1_epoch!(epoch_idx, model_gpu0, model_gpu1, model_gpu2, model_gpu3, 
	model_ps_0, model_ps_1, model_ps_2, model_ps_3, train_dl, optimizer)
	
	# Epoch start
	losses = Float32[]
	step_ct = 0
	for (x, y) in train_dl
		gs, gs_gpu0, gs_gpu1, gs_gpu2, gs_gpu3 = nothing, nothing, nothing, nothing, nothing
		ls0, ls1, ls2, ls3 = nothing, nothing, nothing, nothing
		@sync begin
			# Step start
			@async begin
				# x_gpu0, y_gpu0 = x[:, :, :, 1:10], y[:, :, :, 1:10]
				x_gpu0, y_gpu0 = x[:, :, :, 1:10], y[:, :, :, 1:10]
				ls0, gs = data_parallel(0, x_gpu0, y_gpu0, model_gpu0, model_ps_0)
				ignore_derivatives() do
					gs_gpu0 = [gs[p] for p in gs.params]
				end
			end
			# @async begin
			# 	x_gpu1, y_gpu1 = x[:, :, :, 11:20], y[:, :, :, 11:20]
			# 	ls1, gs_gpu1_temp = data_parallel(1, x_gpu1, y_gpu1, model_gpu1, model_ps_1)
			# 	gs_gpu1 = gradient_to_array(1, gs_gpu1_temp)
			# end
			@async begin
				# x_gpu2, y_gpu2 = x[:, :, :, 21:30], y[:, :, :, 21:30]
				x_gpu2, y_gpu2 = x[:, :, :, 11:20], y[:, :, :, 11:20]
				ls2, gs_gpu2_temp = data_parallel(2, x_gpu2, y_gpu2, model_gpu2, model_ps_2)
				gs_gpu2 = gradient_to_array(2, gs_gpu2_temp)
			end
			@async begin
				# x_gpu3, y_gpu3 = x[:, :, :, 31:end], y[:, :, :, 31:end]
				x_gpu3, y_gpu3 = x[:, :, :, 21:end], y[:, :, :, 21:end]
				ls3, gs_gpu3_temp = data_parallel(3, x_gpu3, y_gpu3, model_gpu3, model_ps_3)
				gs_gpu3 = gradient_to_array(3, gs_gpu3_temp)
			end
		end
		# @info "step $step_ct\tloss = $ls0, $ls1, $ls2, $ls3"
		# push!(losses, (ls0+ls1+ls2+ls3)/4)
		push!(losses, (ls0+ls2+ls3)/4)
		ls0, ls1, ls2, ls3 = nothing, nothing, nothing, nothing
		

		device!(0)
		# gs_gpu0 = gs_gpu0 .+ gs_gpu1 .+ gs_gpu2 .+ gs_gpu3
		gs_gpu0 = gs_gpu0 .+ gs_gpu2 .+ gs_gpu3

		Threads.@threads for i = 1 : 90
			gs.grads[model_ps_0[i]] .= gs_gpu0[i]
		end
		# old_para = Flux.params(model_gpu0)
		# Flux.update!(optimizer, model_gpu0, gs)
	  	Flux.update!(optimizer, model_ps_0, gs)
		# model_gpu0.params = old_para
		model_cpu = model_gpu0 |> cpu

		# sync new params to GPUs
		@sync begin
			@async begin
				device!(0)
				for p in gs.params
					CUDA.unsafe_free!(gs.grads[p])
					# CUDA.unsafe_free!(p)
				end
				# model_ps_0 = Flux.params(model_gpu0)
				Threads.@threads for i = 1 : 90
					CUDA.unsafe_free!(gs_gpu0[i])
				end
			end
			# @async begin
			# 	device!(1)
			# 	Threads.@threads for i = 1 : 90
			# 		CUDA.unsafe_free!(gs_gpu1[i])
			# 	end
			# 	model_gpu1 = nothing
			# 	model_gpu1 = model_cpu |> gpu
			# 	# model_ps_gpu1 = Flux.params(model_gpu1)
			# 	# update_params!(0, 1, model_ps_gpu0, model_ps_gpu1)
			# end
			@async begin
				device!(2)
				Threads.@threads for i = 1 : 90
					CUDA.unsafe_free!(gs_gpu2[i])
				end
				model_gpu2 = nothing
				model_gpu2 = model_cpu |> gpu
				model_ps_2 = Flux.params(model_gpu2)
				# update_params!(0, 2, model_ps_gpu0, model_ps_gpu2)
			end
			@async begin
				device!(3)
				Threads.@threads for i = 1 : 90
					CUDA.unsafe_free!(gs_gpu3[i])
				end
				model_gpu3 = nothing
				model_gpu3 = model_cpu |> gpu
				model_ps_3 = Flux.params(model_gpu3)
				# update_params!(0, 3, model_ps_gpu0, model_ps_gpu3)
			end
		end
		gs, gs_gpu0, gs_gpu1, gs_gpu2, gs_gpu3 = nothing, nothing, nothing, nothing, nothing
		
	  	# Step finished
		step_ct += 1
		if step_ct % 1000 == 0 
			@info "step $step_ct\tloss = $(mean(losses))"
			losses = Float32[]
			# GC.gc(true)
			# device!(3)
			# CUDA.reclaim()
			# device!(1)
			# CUDA.reclaim()
			# device!(2)
			# CUDA.reclaim()
			# device!(0)
			# CUDA.reclaim()
		end
		flush(stdout)
	end
	# Epoch finished
	# if epoch_idx % 5 == 0
	# 	@info "loss = $(mean(losses))"
	# end
	# return model_ps_gpu0, model_ps_gpu1, model_ps_gpu2, model_ps_gpu3
end

train_1_epoch! (generic function with 1 method)

In [21]:
# function report_loss(model, train_dl, valid_dl)
# 	# train set
# 	losses = []
# 	for (x, y) in train_dl
#         ignore_derivatives() do
#             y_cated = cat(Float16(1) .- y, y, dims = 3)
#             x = x |> gpu
#             pred_y = model(x) |> cpu
#             loss = lossfn(pred_y, y_cated)
#             push!(losses, loss)
#         end
# 	end
# 	println("train Loss = $(mean(losses))")
	
# 	# valid set
# 	losses = []
# 	for (x, y) in valid_dl
#         ignore_derivatives() do
#             y_cated = cat(Float16(1) .- y, y, dims = 3)
#             x = x |> gpu
#             pred_y = model(x) |> cpu
#             loss = lossfn(pred_y, y_cated)
#             push!(losses, loss)
#         end
# 	end
# 	println("Valid Loss = $(mean(losses))")
#     flush(stdout)
# end

# 2. Train

[:,:,1,:] --> background  
[:,:,2,:] --> foreground

In [22]:
# @load "a_good_model.jld2" model_0 optimizer

In [23]:
# lossfn = dice_loss
model = unet2D(1, 2);

## 2.1 Debug

In [23]:
# step_ct_sample = 0
# for (x, y) in train_loader
#     step_ct_sample += 1
#     if step_ct_sample == 75
#         global x_sample = x
#         global y_sample = y
#         # println(size(x))
#         # println(typeof(x))
#         # println(size(y))
#         # println(typeof(y))
#         break
#     end
# end

### 2.1.1 Debug for NAN in Loss function

In [24]:
# device!(0)
# model_test = model |> gpu
# x_sample_gpu = x_sample |> gpu
# y_sample_cated = Float32.(y_sample)
# y_sample_pred = model_test(x_sample_gpu) |> cpu;

In [25]:
# maximum(x_sample), minimum(x_sample)

In [26]:
# Gray.(zoom_pxiel_values(x_sample[:,:,1,2]))

In [27]:
# maximum(y_sample_cated), minimum(y_sample_cated)

In [28]:
# maximum(y_sample_pred), minimum(y_sample_pred)

In [29]:
# chnl_idx = 2

In [30]:
# Gray.(y_sample_pred[:,:,chnl_idx,2])

In [31]:
# Gray.(y_sample_cated[:,:,1,2])

In [32]:
# lossfn(y_sample_pred[:,:,:,2], y_sample_cated[:,:,:,2])

In [33]:
# a = sum(y_sample_pred[:,:,chnl_idx,2] .* y_sample_cated[:,:,1,2])

In [34]:
# b = sum(y_sample_pred[:,:,chnl_idx,2] .^ 2)

In [35]:
# c = sum(y_sample_cated[:,:,1,2] .^ 2)

In [36]:
# d = (muladd(2f0, a, 1f-5) / (b + c + 1f-5))

In [37]:
# d_loss = 1f0 - d

### 2.1.2 Debug for GRAM leak

In [40]:
# device!(0)
# CUDA.memory_status() 

# model_gpu0 = model |> gpu
# x_gpu0, y_gpu0 = x_sample[:, :, :, 1:10], y_sample[:, :, :, 1:10]
# y_gpu0 = Float32.(y_gpu0) |> gpu
# x_gpu0 = x_gpu0 |> gpu
# ls, gs = Flux.withgradient(Flux.params(model_gpu0)) do 
#     lossfn(model_gpu0(x_gpu0), y_gpu0)
# end

In [41]:
# device!(0)
# CUDA.memory_status() 

In [42]:
# device!(3)
# CUDA.memory_status() 

# model_gpu3 = model |> gpu
# x_gpu3, y_gpu3 = x_sample[:, :, :, 11:20], y_sample[:, :, :, 11:20]
# y_gpu3 = Float32.(y_gpu3) |> gpu
# x_gpu3 = x_gpu3 |> gpu
# ls3, gs3 = Flux.withgradient(Flux.params(model_gpu3)) do 
#     lossfn(model_gpu3(x_gpu3), y_gpu3)
# end

In [43]:
# device!(3)
# CUDA.memory_status() 

In [44]:
# device!(0)
# gs_gpu0 = [gs[p] for p in gs.params]
# CUDA.memory_status() 

In [45]:
# device!(3)
# gs_gpu3 = gradient_to_array(3, gs3)
# CUDA.memory_status() 

In [46]:
# device!(0)
# gs_gpu0 = gs_gpu0 .+ gs_gpu3
# CUDA.memory_status() 

In [47]:
# optimizer = AdaGrad(0.01)
# device!(0)
# Threads.@threads for i = 1 : 90
#     gs.grads[Flux.params(model_gpu0)[i]] .= gs_gpu0[i]
# end
# old_para = Flux.params(model_gpu0)
# Flux.update!(optimizer, old_para, gs)
# model_cpu = model_gpu0 |> cpu
# CUDA.memory_status() 

In [48]:
# device!(0)
# for p in gs.params
#     CUDA.unsafe_free!(gs.grads[p])
#     # CUDA.unsafe_free!(p)
# end
# Threads.@threads for i = 1 : 90
#     CUDA.unsafe_free!(gs_gpu0[i])
# end
# CUDA.memory_status() 

In [49]:
# device!(3)
# Threads.@threads for i = 1 : 90
#     CUDA.unsafe_free!(gs_gpu3[i])
# end
# model_gpu3 = nothing
# model_gpu3 = model_cpu |> gpu
# CUDA.memory_status() 

In [50]:
# gs, gs_gpu0, gs_gpu1, gs_gpu2, gs_gpu3 = nothing, nothing, nothing, nothing, nothing
# device!(0)
# CUDA.memory_status() 

In [55]:
# CUDA.unsafe_free!(x_sample)
# CUDA.unsafe_free!(y_sample_cated)
# CUDA.memory_status() 

In [56]:
# CUDA.reclaim()
# CUDA.memory_status() 

In [57]:
# model_test = nothing
# GC.gc(true)
# CUDA.memory_status() 

In [58]:
# CUDA.reclaim()
# CUDA.memory_status() 

In [59]:
# gs = nothing
# GC.gc(true)
# CUDA.memory_status() 

In [60]:
# CUDA.reclaim()
# CUDA.memory_status() 

## 2.2 Actual train

In [24]:
device!(0)
model_0 = model |> gpu
model_ps_0 = Flux.params(model_0)
# device!(1)
model_1 = nothing
model_ps_1 = nothing
# model_1 = model |> gpu
# model_ps_1 = Flux.params(model_1)
device!(2)
model_2 = model |> gpu
model_ps_2 = Flux.params(model_2)
device!(3)
model_3 = model |> gpu
model_ps_3 = Flux.params(model_3);
""

""

In [25]:
# model_test = nothing
# x_sample = nothing
# y_sample = nothing
# device!(0)
# CUDA.reclaim()
# device!(1)
# CUDA.reclaim()
# device!(2)
# CUDA.reclaim()
# device!(3)
# CUDA.reclaim()
# GC.gc(true)
# device!(0)

In [26]:
# @sync begin
#     # Step start
#     @async begin
#         device!(0)
#         global model_0 = model |> gpu
#         global model_ps_0 = Flux.params(model_0)
#     end
#     @async begin
#         device!(1)
#         global model_1 = model |> gpu
#         global model_ps_1 = Flux.params(model_1)
#     end
#     @async begin
#         device!(2)
#         global model_2 = model |> gpu
#         global model_ps_2 = Flux.params(model_2)
#     end
#     @async begin
#         device!(3)
#         global model_3 = model |> gpu
#         global model_ps_3 = Flux.params(model_3)
#     end
# end
# modelss = [model_0, model_1, model_2, model_3]
# model_pss = [model_ps_0, model_ps_1, model_ps_2, model_ps_3];

In [27]:
optimizer = AdaGrad(0.01);
# opt_state = Flux.setup(AdaGrad(0.01), model_0);

AdaGrad(0.01, 1.0e-8, IdDict{Any, Any}())

In [28]:
# device!(0)
# model = model_0 |> cpu
# @save "a_good_model2.jld2" model optimizer

In [29]:
train_1_epoch!(1, model_0, model_1, model_2, model_3, model_ps_0, model_ps_1, model_ps_2, model_ps_3, train_loader, optimizer)

┌ Info: step 1000	loss = 0.749521
└ @ Main /home/molloi-lab/Desktop/Project BAC/BAC project/4_train.ipynb:147


CompositeException: TaskFailedException

    nested task error: Out of GPU memory trying to allocate 320.141 MiB
    Effective GPU memory usage: 99.91% (22.139 GiB/22.159 GiB)
    Memory pool usage: 10.955 GiB (21.625 GiB reserved)
    
    Stacktrace:
      [1] macro expansion
        @ ~/.julia/packages/CUDA/pCcGc/src/pool.jl:424 [inlined]
      [2] macro expansion
        @ ./timing.jl:393 [inlined]
      [3] #_alloc#989
        @ ~/.julia/packages/CUDA/pCcGc/src/pool.jl:413 [inlined]
      [4] _alloc
        @ ~/.julia/packages/CUDA/pCcGc/src/pool.jl:408 [inlined]
      [5] #alloc#988
        @ ~/.julia/packages/CUDA/pCcGc/src/pool.jl:398 [inlined]
      [6] alloc
        @ ~/.julia/packages/CUDA/pCcGc/src/pool.jl:392 [inlined]
      [7] CuArray{UInt8, 1, CUDA.Mem.DeviceBuffer}(#unused#::UndefInitializer, dims::Tuple{Int64})
        @ CUDA ~/.julia/packages/CUDA/pCcGc/src/array.jl:93
      [8] CuArray
        @ ~/.julia/packages/CUDA/pCcGc/src/array.jl:176 [inlined]
      [9] CuArray
        @ ~/.julia/packages/CUDA/pCcGc/src/array.jl:183 [inlined]
     [10] with_workspace(f::cuDNN.var"#647#649"{CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cuDNN.cudnnActivationMode_t, cuDNN.cudnnConvolutionDescriptor, cuDNN.cudnnFilterDescriptor, cuDNN.cudnnTensorDescriptor, cuDNN.cudnnTensorDescriptor, Base.RefValue{Float32}, Base.RefValue{Float32}, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cuDNN.cudnnConvolutionFwdAlgoPerfStruct}, eltyp::Type{UInt8}, size::CUDA.APIUtils.var"#2#3"{UInt64}, fallback::Nothing; keep::Bool)
        @ CUDA.APIUtils ~/.julia/packages/CUDA/pCcGc/lib/utils/call.jl:67
     [11] with_workspace(f::Function, eltyp::Type{UInt8}, size::CUDA.APIUtils.var"#2#3"{UInt64}, fallback::Nothing)
        @ CUDA.APIUtils ~/.julia/packages/CUDA/pCcGc/lib/utils/call.jl:58
     [12] #with_workspace#1
        @ ~/.julia/packages/CUDA/pCcGc/lib/utils/call.jl:55 [inlined]
     [13] with_workspace
        @ ~/.julia/packages/CUDA/pCcGc/lib/utils/call.jl:55 [inlined]
     [14] with_workspace(f::Function, size::UInt64)
        @ CUDA.APIUtils ~/.julia/packages/CUDA/pCcGc/lib/utils/call.jl:55
     [15] cudnnConvolutionForwardAD(w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, bias::Nothing, z::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; y::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, activation::cuDNN.cudnnActivationMode_t, convDesc::cuDNN.cudnnConvolutionDescriptor, wDesc::cuDNN.cudnnFilterDescriptor, xDesc::cuDNN.cudnnTensorDescriptor, yDesc::cuDNN.cudnnTensorDescriptor, zDesc::cuDNN.cudnnTensorDescriptor, biasDesc::Nothing, alpha::Base.RefValue{Float32}, beta::Base.RefValue{Float32}, dw::Base.RefValue{Any}, dx::Base.RefValue{Any}, dz::Base.RefValue{Any}, dbias::Base.RefValue{Any}, dready::Base.RefValue{Bool})
        @ cuDNN ~/.julia/packages/cuDNN/3J08S/src/convolution.jl:103
     [16] cudnnConvolutionForwardAD
        @ ~/.julia/packages/cuDNN/3J08S/src/convolution.jl:101 [inlined]
     [17] cudnnConvolutionForwardWithDefaults(w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; padding::Int64, stride::Int64, dilation::Int64, mode::cuDNN.cudnnConvolutionMode_t, mathType::cuDNN.cudnnMathType_t, reorderType::cuDNN.cudnnReorderType_t, group::Int64, format::cuDNN.cudnnTensorFormat_t, convDesc::cuDNN.cudnnConvolutionDescriptor, xDesc::cuDNN.cudnnTensorDescriptor, wDesc::cuDNN.cudnnFilterDescriptor, y::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, yDesc::cuDNN.cudnnTensorDescriptor, alpha::Int64, beta::Int64, bias::Nothing, z::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, biasDesc::Nothing, zDesc::cuDNN.cudnnTensorDescriptor, activation::cuDNN.cudnnActivationMode_t, dw::Base.RefValue{Any}, dx::Base.RefValue{Any}, dz::Base.RefValue{Any}, dbias::Base.RefValue{Any})
        @ cuDNN ~/.julia/packages/cuDNN/3J08S/src/convolution.jl:96
     [18] #cudnnConvolutionForward!#644
        @ ~/.julia/packages/cuDNN/3J08S/src/convolution.jl:53 [inlined]
     [19] cudnnConvolutionForward!
        @ ~/.julia/packages/cuDNN/3J08S/src/convolution.jl:53 [inlined]
     [20] conv!(y::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, 2, 2, 4, 2}; alpha::Int64, beta::Int64, algo::Int64)
        @ NNlibCUDA ~/.julia/packages/NNlibCUDA/C6t0p/src/cudnn/conv.jl:67
     [21] conv!
        @ ~/.julia/packages/NNlibCUDA/C6t0p/src/cudnn/conv.jl:58 [inlined]
     [22] conv(x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, 2, 2, 4, 2}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ NNlib ~/.julia/packages/NNlib/Fg3DQ/src/conv.jl:88
     [23] conv
        @ ~/.julia/packages/NNlib/Fg3DQ/src/conv.jl:83 [inlined]
     [24] #rrule#379
        @ ~/.julia/packages/NNlib/Fg3DQ/src/conv.jl:355 [inlined]
     [25] rrule
        @ ~/.julia/packages/NNlib/Fg3DQ/src/conv.jl:345 [inlined]
     [26] rrule
        @ ~/.julia/packages/ChainRulesCore/0t04l/src/rules.jl:134 [inlined]
     [27] chain_rrule
        @ ~/.julia/packages/Zygote/JeHtr/src/compiler/chainrules.jl:223 [inlined]
     [28] macro expansion
        @ ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:101 [inlined]
     [29] _pullback
        @ ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:101 [inlined]
     [30] _pullback
        @ ~/.julia/packages/Flux/n3cOc/src/layers/conv.jl:202 [inlined]
     [31] _pullback(ctx::Zygote.Context{true}, f::Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, args::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:0
     [32] macro expansion
        @ ~/.julia/packages/Flux/n3cOc/src/layers/basic.jl:53 [inlined]
     [33] _pullback
        @ ~/.julia/packages/Flux/n3cOc/src/layers/basic.jl:53 [inlined]
     [34] _pullback
        @ ~/.julia/packages/Flux/n3cOc/src/layers/basic.jl:51 [inlined]
     [35] _pullback(ctx::Zygote.Context{true}, f::Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, args::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:0
     [36] macro expansion
        @ ~/.julia/packages/Flux/n3cOc/src/layers/basic.jl:53 [inlined]
     [37] _pullback
        @ ~/.julia/packages/Flux/n3cOc/src/layers/basic.jl:53 [inlined]
     [38] _pullback(::Zygote.Context{true}, ::typeof(Flux._applychain), ::Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, var"#34#36"}}}, ::CuArray{Float16, 4, CUDA.Mem.DeviceBuffer})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:0
     [39] _pullback
        @ ~/.julia/packages/Flux/n3cOc/src/layers/basic.jl:51 [inlined]
     [40] _pullback(ctx::Zygote.Context{true}, f::Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, var"#34#36"}}}}, args::CuArray{Float16, 4, CUDA.Mem.DeviceBuffer})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:0
     [41] _pullback
        @ ~/Desktop/Project BAC/BAC project/4_train.ipynb:6 [inlined]
     [42] _pullback(::Zygote.Context{true}, ::var"#40#41"{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, var"#34#36"}}}}, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float16, 4, CUDA.Mem.DeviceBuffer}})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface2.jl:0
     [43] pullback(f::Function, ps::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface.jl:384
     [44] withgradient(f::Function, args::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
        @ Zygote ~/.julia/packages/Zygote/JeHtr/src/compiler/interface.jl:132
     [45] data_parallel(device_id::Int64, x::Array{Float16, 4}, y::Array{Float16, 4}, model::Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, var"#34#36"}}}}, model_ps::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
        @ Main ~/Desktop/Project BAC/BAC project/4_train.ipynb:5
     [46] macro expansion
        @ ~/Desktop/Project BAC/BAC project/4_train.ipynb:56 [inlined]
     [47] (::var"#49#61"{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Parallel{typeof(FastVision.Models.catchannels), Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, MaxPool{2, 4}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{ConvTranspose{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}, Chain{Tuple{Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, BatchNorm{typeof(leakyrelu), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{Conv{2, 4, typeof(identity), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, var"#34#36"}}}}, Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}, Array{Float16, 4}, Array{Float16, 4}})()
        @ Main ./task.jl:514

In [30]:
# train_1_epoch!(2, model_0, model_1, model_2, model_3, train_loader, optimizer)

In [31]:
# train_1_epoch!(3, model_0, model_1, model_2, model_3, train_loader, optimizer)

In [None]:
# train_1_epoch!(4, model_0, model_1, model_2, model_3, train_loader, optimizer)

In [None]:
# train_1_epoch!(5, model_0, model_1, model_2, model_3, train_loader, optimizer)