In [1]:
using Pkg
Pkg.activate("./")
# Pkg.resolve()
# Pkg.instantiate()

using DrWatson
using MPI
using ParametricOperators
using Parameters
using Profile
using Shuffle
using Zygote
using PyPlot
using NNlib
using NNlibCUDA
using FNO4CO2
using JLD2
using Flux, Random, FFTW
using MAT, Statistics, LinearAlgebra
using CUDA
using ProgressMeter
using InvertibleNetworks:ActNorm
using Random
matplotlib.use("Agg")

[32m[1m  Activating[22m[39m project at `~/Desktop/Research/Code/dfno/dfno`


┌ Info: OMEinsum loaded the CUDA module successfully
└ @ OMEinsum /Users/richardr2926/.julia/packages/OMEinsum/lTBCn/src/cueinsum.jl:117
┌ Info: FNO4CO2 is using GPU
└ @ FNO4CO2 /Users/richardr2926/Desktop/Research/Code/FNO4CO2/src/FNO4CO2.jl:15


In [2]:
update = ParametricOperators.update!


@with_kw struct ModelConfig
    nx::Int = 64
    ny::Int = 64
    nz::Int = 64
    nt_in::Int = 51
    nt_out::Int = 51
    nc_in::Int = 4
    nc_mid::Int = 128
    nc_out::Int = 1
    nc_lift::Int = 20
    mx::Int = 4
    my::Int = 4
    mz::Int = 4
    mt::Int = 4
    n_blocks::Int = 1
    n_batch::Int = 1
    dtype::DataType = Float32
    partition::Vector{Int} = [1]
end

ModelConfig

In [3]:
function PO_FNO4CO2(config::ModelConfig)

    T = config.dtype

    function lifting(in_shape, lift_dim, out_features, T=Float32)

        net = ParIdentity(T, 1) 
    
        for dim in eachindex(in_shape)
            if dim == lift_dim
                layer = ParMatrix(T, out_features, in_shape[dim])
            else 
                layer = ParIdentity(T, in_shape[dim])
            end
            
            if dim == 1
                net = layer
            else
                net = layer ⊗ net
            end
        end
    
        return net
    end

    function spectral_convolution()

        # Build 4D Fourier transform with real-valued FFT along time
        fourier_x = ParDFT(Complex{T}, config.nx)
        fourier_y = ParDFT(Complex{T}, config.ny)
        # fourier_z = ParDFT(Complex{T}, config.nz)
        fourier_t = ParDFT(T, config.nt_out)

        # Build restrictions to low-frequency modes
        restrict_x = ParRestriction(Complex{T}, Range(fourier_x), [1:config.mx, config.nx-config.mx+1:config.nx])
        restrict_y = ParRestriction(Complex{T}, Range(fourier_y), [1:config.my, config.ny-config.my+1:config.ny])
        # restrict_z = ParRestriction(Complex{T}, Range(fourier_z), [1:config.mz, config.nz-config.mz+1:config.nz])
        restrict_t = ParRestriction(Complex{T}, Range(fourier_t), [1:config.mt])

        # weight_mix = ParIdentity(Complex{T}, Range(restrict_dft) ÷ config.nc_lift) ⊗
        #             ParMatrix(Complex{T}, config.nc_lift, config.nc_lift)

        input_shape = (config.nc_lift, 2*config.mx, 2*config.my, config.mt)
        weight_shape = (config.nc_lift, config.nc_lift, 2*config.mx, 2*config.my, config.mt)

        input_order = (1, 2, 3, 4)
        weight_order = (5, 1, 2, 3, 4)
        target_order = (5, 2, 3, 4)

        weight_mix = ParMatrixN(Complex{T}, weight_order, weight_shape, input_order, input_shape, target_order, input_shape) 

        # Setup FFT-restrict pattern with Kroneckers
        restrict_dft = (restrict_t * fourier_t) ⊗ (restrict_y * fourier_y) ⊗ (restrict_x * fourier_x) ⊗ ParIdentity(T, config.nc_lift)
        # restrict_dft = (restrict_t ⊗ restrict_y ⊗ restrict_x) * (fourier_t ⊗ fourier_y ⊗ fourier_x) ⊗ ParIdentity(T, config.nc_lift)

        sconv = restrict_dft' * weight_mix * restrict_dft

        return sconv
    end

    shape = [config.nc_in, config.nx, config.ny, config.nt_in]

    # Lift Channel dimension
    lifts = ParIdentity(Float32,round(Int, prod(shape)/config.nc_in)) ⊗ ParMatrix(Float32, config.nc_lift, config.nc_in) # lifting(shape, 1, config.nc_lift)
    shape[1] = config.nc_lift

    sconvs = []
    convs = []
    projects = []

    for i in 1:config.n_blocks

        sconv_layer = spectral_convolution()
        conv_layer = ParIdentity(Float32,round(Int, prod(shape)/config.nc_lift)) ⊗ ParMatrix(Float32, config.nc_lift, config.nc_lift) # lifting(shape, 1, config.nc_lift)

        push!(sconvs, sconv_layer)
        push!(convs, conv_layer)
    end

    # Uplift channel dimension once more
    uc = ParIdentity(Float32,round(Int, prod(shape)/config.nc_lift)) ⊗ ParMatrix(Float32, config.nc_mid, config.nc_lift) # lifting(shape, 1, config.nc_mid)
    shape[1] = config.nc_mid
    push!(projects, uc)

    # Project channel dimension
    pc = ParIdentity(Float32,round(Int, prod(shape)/config.nc_mid)) ⊗ ParMatrix(Float32, config.nc_out, config.nc_mid) # lifting(shape, 1, config.nc_out)
    shape[1] = config.nc_out
    push!(projects, pc)

    return lifts, sconvs, convs, projects
end

modes = 4
width = 20

config = ModelConfig(mx=modes, my=modes, mt=modes, nc_lift=width, n_blocks=1, n_batch=2)
lifts, sconvs, convs, projects = PO_FNO4CO2(config)

# To Load Saved Dict: 
# key = load("./data/3D_FNO/.jld2")["key"]

θ = init(lifts)
for sconv in sconvs
    init!(sconv, θ)
end
for conv in convs
    init!(conv, θ)
end
init!(projects[1], θ)
init!(projects[2], θ)

gpu_flag && (global θ = gpu(θ))

function xytcb_to_cxytb(x)
    return permutedims(x, [4,1,2,3,5])
end

function cxytb_to_xytcb(x)
    return permutedims(x, [2,3,4,1,5])
end

function forward(θ, x::Any)
    x = lifts(θ) * x
    for i in 1:config.n_blocks

        x = (sconvs[i](θ) * x) + (convs[i](θ) * x)
        x = cxytb_to_xytcb(reshape(x, (config.nc_lift, config.nx, config.ny, config.nt_in, :)))

        N = ndims(x)
        ϵ = 1f-5

        reduce_dims = [1:N-2; N]

        μ = mean(x; dims=reduce_dims)
        σ² = var(x; mean=μ, dims=reduce_dims, corrected=false)

        prod = config.nc_lift * config.nx * config.ny * config.nt_in

        x = (x .- μ) ./ sqrt.(σ² .+ ϵ)
        x = reshape(xytcb_to_cxytb(x), (prod, :))
        
        if i < config.n_blocks
            x = relu.(x)
        end
    end

    x = projects[1](θ) * x
    x = relu.(x)
    x = projects[2](θ) * x
    return x
end

# x_train = rand(DDT(lifts), Domain(lifts))
# y_train = rand(RDT(projects), Range(projects))

# y = forward(θ, x_train)
# grads = gradient(params -> Flux.mse(forward(params, x_train), y_train), θ)

┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /Users/richardr2926/.julia/packages/Flux/n3cOc/src/functor.jl:301


forward (generic function with 1 method)

In [4]:
# Define raw data directory
mkpath(datadir("training-data"))
perm_path = datadir("training-data", "perm_gridspacing15.0.mat")
conc_path = datadir("training-data", "conc_gridspacing15.0.mat")

# Download the dataset into the data directory if it does not exist
if ~isfile(perm_path)
    run(`wget https://www.dropbox.com/s/o35wvnlnkca9r8k/'
        'perm_gridspacing15.0.mat -q -O $perm_path`)
end
if ~isfile(conc_path)
    run(`wget https://www.dropbox.com/s/mzi0xgr0z3l553a/'
        'conc_gridspacing15.0.mat -q -O $conc_path`)
end

In [5]:

perm = matread(perm_path)["perm"];
conc = matread(conc_path)["conc"];

nsamples = size(perm, 3)

ntrain = 1000
nvalid = 100

batch_size = config.n_batch
learning_rate = 1f-4

epochs = 1

modes = 4
width = 20

n = (config.nx,config.ny)
#d = (15f0,15f0) # dx, dy in m
d = (1f0/config.nx, 1f0/config.ny)

s = 1

nt = 51
#dt = 20f0    # dt in day
dt = 1f0/(nt-1)

AN = ActNorm(ntrain)
AN.forward(reshape(perm[1:s:end,1:s:end,1:ntrain], n[1], n[2], 1, ntrain));

y_train = permutedims(conc[1:nt,1:s:end,1:s:end,1:ntrain],[2,3,1,4]);
y_valid = permutedims(conc[1:nt,1:s:end,1:s:end,ntrain+1:ntrain+nvalid],[2,3,1,4]);

grid = gen_grid(n, d, nt, dt)

x_train = perm_to_tensor(perm[1:s:end,1:s:end,1:ntrain],grid,AN);
x_valid = perm_to_tensor(perm[1:s:end,1:s:end,ntrain+1:ntrain+nvalid],grid,AN);
x_valid_dfno = xytcb_to_cxytb(x_valid)

# value, x, y, t

NN = Net3d(modes, width)
gpu_flag && (global NN = NN |> gpu)

Flux.trainmode!(NN, true)
w = Flux.params(NN)

opt = Flux.Optimise.ADAMW(learning_rate, (0.9f0, 0.999f0), 1f-4)
nbatches = Int(ntrain/batch_size)

Loss = zeros(Float32,epochs*nbatches)
Loss_valid = zeros(Float32, epochs)
prog = Progress(round(Int, ntrain * epochs / batch_size))

# plot figure
x_plot = x_valid[:, :, :, :, 1:1]
y_plot = y_valid[:, :, :, 1:1]
x_plot_dfno = vec(xytcb_to_cxytb(x_plot))

# Define result directory

sim_name = "3D_FNO"
exp_name = "2phaseflow"

save_dict = @strdict exp_name
plot_path = plotsdir(sim_name, savename(save_dict; digits=6))

"/Users/richardr2926/Desktop/Research/Code/dfno/dfno/plots/3D_FNO/exp_name=2phaseflow"

In [6]:
ep = 1
b = 1

Base.flush(Base.stdout)
idx_e = reshape(randperm(ntrain), batch_size, nbatches)

x = x_train[:, :, :, :, idx_e[:,b]]
y = y_train[:, :, :, idx_e[:,b]]

x_dfno = reshape(xytcb_to_cxytb(x), (:, config.n_batch))
y_dfno = reshape(y, (:, config.n_batch));

In [7]:
grads_dfno = gradient(params -> norm(relu01(forward(params, x_dfno))-y_dfno)/norm(y_dfno), θ)[1]
grads = gradient(w) do
    global loss = norm(relu01(NN(x))-y)/norm(y)
    return loss
end

loss_dfno = norm(relu01(forward(θ, x_dfno))-y_dfno)/norm(y_dfno)
loss = norm(relu01(NN(x))-y)/norm(y)

# loss_dfno = norm(relu01(forward(θ, x_dfno)))/norm(y_dfno)
# loss = norm(relu01(NN(x)))/norm(y)

println("DFNO Loss: ", loss_dfno, ". NN Loss: ", loss)

DFNO Loss: 2.330852

. NN Loss: 2.7075887


In [13]:
sum(vec(NN(x)) - vec(forward(θ, x_dfno)))

33391.81f0

In [35]:
o = 1
test_w1 = 0

for (v, p) in θ
    # println(v)
    if o == -1
        println(v)
        test_w1 = p
    end
    o += 1
end

ParMatrixN{5, 4, 4, ComplexF32}((5, 1, 2, 3, 4), (20, 20, 8, 8, 4), (1, 2, 3, 4), (20, 8, 8, 4), (5, 2, 3, 4), (20, 8, 8, 4), UUID("f9aa5075-86e6-4af4-a5c0-b8042cae3e56"))


In [36]:
o = 1
test_w2 = 0
for p in w
    # println(size(p))
    if o == -1
        println(size(p))
        test_w2 = p
    end
    o += 1
end

(20, 20, 8, 8, 4, 1)


In [37]:
sum(test_w1 - test_w2)

0.0f0 + 0.0f0im

In [10]:
function gen(shape...)
    Random.seed!(1234)
    return Flux.glorot_uniform(shape...)
    # return rand(Float32, shape...) / convert(Float32, sqrt(prod(shape)))
end

function compl_mul(x::AbstractArray{T, 5}, y::AbstractArray{T, 5}) where T
    # complex multiplication
    y =  permutedims(y,[5,3,4,2,1]) # (oixyt) -> (txyio) bc x is (txyib)
    # x in (modes1, modes2, modes3, input channels, batchsize)
    # y in (modes1, modes2, modes3, input channels, output channels)
    # output in (modes1,modes2,modes3,output channels,batchsize)
    x_per = permutedims(x,[5,4,1,2,3]) # batchsize*in_channels*modes1*modes2*modes3
    y_per = permutedims(y,[4,5,1,2,3]) # in_channels*out_channels*modes1*modes2*modes3
    x_resh = reshape(x_per,size(x_per,1),size(x_per,2),:) # batchsize*in_channels*(modes1*modes2*modes3)
    y_resh = reshape(y_per,size(y_per,1),size(y_per,2),:) # in_channels*out_channels*(modes1*modes2*modes3)
    out_resh = batched_mul(x_resh,y_resh) # batchsize*out_channels*(modes1*modes2*modes3)
    out_per = reshape(out_resh,size(out_resh,1),size(out_resh,2),size(x,1),size(x,2),size(x,3)) # batchsize*out_channels*modes1*modes2*modes3
    out = permutedims(out_per,[3,4,5,2,1])
    return out
end

T = Float32

conv = Flux.Conv((1, 1, 1), config.nc_in=>config.nc_lift; init=gen, bias=false)
xt = conv(x) # xytcb
temp = conv(x) # xytcb

xt = permutedims(xt, [3,1,2,4,5]) # txycb
x_ft = rfft(xt,[1,2,3])      ## full size FFT

Random.seed!(1234)
weights = rand(Complex{T}, config.nc_lift, config.nc_lift, 8, 8, 4, 1) ./ convert(T, sqrt(config.nc_lift * config.nc_lift * 8 * 8 * 4))

modes1 = config.mt
modes2 = config.mx
modes3 = config.my

# only keep low frequency coefficients weights[1,1,1,:,:,1]
out_ft = cat(cat(cat(compl_mul(x_ft[1:modes1, 1:modes2, 1:modes3, :,:], weights[:,:,1:4,1:4,:,1]),
                zeros(Complex{T}, modes1, modes2, size(x_ft,3)-2*modes3, size(x_ft,4), size(x_ft,5)), 
                compl_mul(x_ft[1:modes1, 1:modes2, end-modes3+1:end,:,:], weights[:,:,5:8,1:4,:,1]),dims=3),
                zeros(Complex{T}, modes1, size(x_ft, 2)-2*modes2, size(x_ft,3), size(x_ft,4), size(x_ft,5)),
                cat(compl_mul(x_ft[1:modes1, end-modes2+1:end, 1:modes3,:,:], weights[:,:,1:4,5:8,:,1]),
                zeros(Complex{T}, modes1, modes2, size(x_ft,3)-2*modes3, size(x_ft,4), size(x_ft,5)),
                compl_mul(x_ft[1:modes1, end-modes2+1:end, end-modes3+1:end,:,:], weights[:,:,5:8,5:8,:,1]),dims=3)
                ,dims=2),
                zeros(Complex{T}, size(x_ft,1)-modes1, size(x_ft,2), size(x_ft,3), size(x_ft,4), size(x_ft,5)),dims=1)

out_ft = irfft(out_ft, size(xt,1),[1,2,3]) # nt * nx * ny * channels * batch
out_ft = permutedims(out_ft, [2,3,1,4,5]);

In [11]:
lifting = ParIdentity(T, config.nx*config.ny*config.nt_in) ⊗ ParMatrix(T, config.nc_lift, config.nc_in)
θ_new = init(lifting)

fourier_x = ParDFT(Complex{T}, config.nx)
fourier_y = ParDFT(Complex{T}, config.ny)
# fourier_z = ParDFT(Complex{T}, config.nz)
fourier_t = ParDFT(T, config.nt_out)

# Build restrictions to low-frequency modes
restrict_x = ParRestriction(Complex{T}, Range(fourier_x), [1:config.mx, config.nx-config.mx+1:config.nx])
restrict_y = ParRestriction(Complex{T}, Range(fourier_y), [1:config.my, config.ny-config.my+1:config.ny])
# restrict_z = ParRestriction(Complex{T}, Range(fourier_z), [1:config.mz, config.nz-config.mz+1:config.nz])
restrict_t = ParRestriction(Complex{T}, Range(fourier_t), [1:config.mt])

input_shape = (config.nc_lift, 2*config.mx, 2*config.my, config.mt)
weight_shape = (config.nc_lift, config.nc_lift, 2*config.mx, 2*config.my, config.mt) # 3 is the no of dimensions including time
target_shape = input_shape

input_order = (1, 2, 3, 4)
weight_order = (5, 1, 2, 3, 4)
target_order = (5, 2, 3, 4)

weight_mix = ParMatrixN(Complex{T}, weight_order, weight_shape, input_order, input_shape, target_order, target_shape) 
init!(weight_mix, θ_new)

dft = (restrict_t ⊗ restrict_y ⊗ restrict_x) * (fourier_t ⊗ fourier_y ⊗ fourier_x) ⊗ ParIdentity(T, config.nc_lift)
output = cxytb_to_xytcb(reshape(dft' * weight_mix(θ_new) * dft * lifting(θ_new) * x_dfno, (config.nc_lift, config.nx, config.ny, config.nt_out, config.n_batch)))
;

In [12]:
sum(vec(output) - vec(out_ft))

9.536743f-7

In [15]:
sum(relu01(out_ft) - relu01(output))

5898.789f0

In [11]:
lifting = ParIdentity(T, config.nx*config.ny*config.nt_in) ⊗ ParMatrix(T, config.nc_lift, config.nc_in)
θ_new = init(lifting)

fourier_x = ParDFT(Complex{T}, config.nx)
fourier_y = ParDFT(Complex{T}, config.ny)
# fourier_z = ParDFT(Complex{T}, config.nz)
fourier_t = ParDFT(T, config.nt_out)

# Build restrictions to low-frequency modes
restrict_x = ParRestriction(Complex{T}, Range(fourier_x), [1:config.mx, config.nx-config.mx+1:config.nx])
restrict_y = ParRestriction(Complex{T}, Range(fourier_y), [1:config.my, config.ny-config.my+1:config.ny])
# restrict_z = ParRestriction(Complex{T}, Range(fourier_z), [1:config.mz, config.nz-config.mz+1:config.nz])
restrict_t = ParRestriction(Complex{T}, Range(fourier_t), [1:config.mt])

weight_mix = ParIdentity(Complex{T}, config.mt*config.mx*config.my*4) ⊗ ParMatrix(Complex{T}, config.nc_lift, config.nc_lift)
init!(weight_mix, θ_new)

dft = (restrict_t * fourier_t) ⊗ (restrict_y * fourier_y) ⊗ (restrict_x * fourier_x) ⊗ ParIdentity(T, config.nc_lift)
output = cxytb_to_xytcb(reshape(dft' * weight_mix(θ_new) * dft * lifting(θ_new) * x_dfno, (config.nc_lift, config.nx, config.ny, config.nt_out, config.n_batch)))
;

In [31]:
o = 0
test = 0
for (k, v) in θ_new
    if o == 1
        test = v
    end
    o += 1
end

size(weights)
size(test)

sum(test - weights)

0.0f0 + 0.0f0im

In [None]:
using ParametricOperators

T = Complex{Int64}

input_shape = (5, 3, 3)
weight_shape = (5, 5, 3, 3)
target_shape = input_shape

input_order = (1, 2, 3)
weight_order = (4, 1, 2, 3)
target_order = (4, 2, 3)

operator = ParMatrixN(T, weight_order, weight_shape, input_order, input_shape, target_order, target_shape) 
weights = init(operator)

operator(weights) * vec(rand(T, input_shape...))

In [None]:
using OMEinsum

input_shape = (5, 3, 3)
weight_shape = (5, 5, 3, 3)

input = rand(input_shape...)
weight = rand(weight_shape...)

target_order = (4, 2, 3)
weight_order = (4, 1, 2, 3)
input_order = (1, 2, 3)

target = einsum(EinCode((weight_order,input_order),target_order),(weight, input))

In [None]:
tempx = permutedims(x_ft[1:modes1, 1:modes2, 1:modes3, :,:], [4,2,3,1,5]) # nc * nx * ny * nt * batch
tempx = reshape(tempx, (1, 20, 64))

tempxout = batched_mul(tempx,weights[1,1,1,:,:,1])

inter = reshape(dft * lifting(θ_new) * x_dfno, (20, 8, 8, 4))
tempy = inter[:, 1:4, 1:4, :]

weights_dfno = ParIdentity(Complex{T}, 4*4*4) ⊗ ParMatrix(Complex{T}, config.nc_lift, config.nc_lift) # nc * nx * ny * nt
θ_new2 = init(weights_dfno)
weights_temp = 0

for (k, v) in θ_new2
    weights_temp = permutedims(v, [2, 1])
end

tempyout1 = reshape(weights_dfno(θ_new2) * vec(tempy), (1, 20, 64))
tempyout2 = batched_mul(reshape(tempy, (1,20,64)),weights_temp)

sum(vec(tempyout1) - vec(tempyout2))
# println(sum(vec(tempx) - vec(tempy)))
# println(sum(vec(weights_temp) - vec(weights[1,1,1,:,:,1])))

# Random.seed!(1234)
# baseline = rand(ComplexF32, 20, 20) ./ 20 # 1f0 / convert(real(T), sqrt(20 * 20)) # ./ convert(real(T), sqrt(20 * 20))

# scale = 1f0 / convert(real(T), sqrt(20 * 20))
# Random.seed!(1234)
# weights = scale*rand(Complex{T}, 1, 1, 1, 20, 20, 1)

# println(sum(vec(baseline) - vec(weights[1,1,1,:,:,1])))
# println(sum(vec(baseline) - vec(weights_temp)))

In [30]:
u = 1

for p in grads
    println(size(p))
    if u == 1
        print(p)
    end
    u += 1
end

(1, 1, 1, 4, 20)
[-0.16171706;;;; -0.20726466;;;; -0.25518337;;;; 0.2643387;;;;; 1.3198342;;;; -0.254401;;;; -0.02295451;;;; 0.08295583;;;;; -0.3161814;;;; -0.19592044;;;; 0.267937;;;; 0.09709002;;;;; 0.08489406;;;; -0.25650942;;;; -0.11920935;;;; 0.03064397;;;;; -0.18131526;;;; 0.1065499;;;; -0.24246559;;;; 0.14511268;;;;; -0.9177591;;;; 0.42056245;;;; 0.054471824;;;; 0.079119205;;;;; 1.2851052;;;; 0.1415495;;;; -0.32246733;;;; -0.08724655;;;;; 0.69611615;;;; -0.020051612;;;; -0.13853109;;;; -0.36852628;;;;; -0.9076334;;;; 0.06441738;;;; -0.11737078;;;; -0.0039142985;;;;; 1.5187979;;;; -0.15598628;;;; -0.09682914;;;; 0.19104315;;;;; -0.5640604;;;; 0.23401311;;;; -0.051067285;;;; 0.007611217;;;;; -0.2988591;;;; -0.19486529;;;; -0.005107416;;;; 0.20380133;;;;; -0.53433424;;;; 0.22049609;;;; -0.25762945;;;; 0.0068155895;;;;; -1.0707594;;;; -0.14015822;;;; 0.016079295;;;; 0.15934826;;;;; 1.0874187;;;; 0.07237384;;;; 0.185438;;;; -0.14645414;;;;; 1.3427666;;;; -0.30003825;;;; -0.44280475;;

In [31]:
o = 1
for (k, v) in grads_dfno
    println(k)
    if o == 3
        print(v)
    end
    o += 1
end

ParMatrix{Float32}(20, 20, UUID("d6b2595c-8a9e-442b-82d2-a3a87f751aa4"))
ParMatrix{Float32}(1, 128, UUID("fe9780f6-10c7-44c6-9362-7bb76d73bb36"))
ParMatrix{Float32}(20, 4, UUID("ff6d4d00-06df-404d-b891-ca1e9d1aaec3"))


Float32[-0.42632148 -0.1928737 0.21309604 0.045274157; -0.70842373 0.13291544 -0.17014682 0.35531783; -0.10550135 -0.09502187 0.16965935 0.031863436; -0.4176053 -0.09258261 0.32370213 0.005126861; 0.47769243 0.061752353 0.22284232 -0.21518424; 0.29890057 0.37176663 -0.6144696 -0.28437307; 0.1459418 0.038446013 -0.29879722 0.48521975; 0.5266604 -0.12852213 0.44297343 0.17632857; 0.7538901 -0.11182453 0.440801 -0.34265572; -0.35054535 0.16432957 -0.42745912 0.3498021; 0.5598861 -0.14096613 0.7443514 -0.35446283; -0.67265284 0.022514885 -0.2403444 0.31744096; 0.70876384 0.1366038 -0.107366905 -0.22658512; -0.9902921 -0.09487598 -0.2017013 0.20810148; -0.51238936 0.0068213604 -0.023764405 0.29441133; -0.3607113 -0.11047865 0.656597 -0.14510065; -0.21281554 -0.0065856706 0.51843774 -0.10859058; 0.91978604 0.25676775 -0.32844552 -0.10390376; 0.3094233 -0.05528579 0.176711 0.21467009; -0.16980201 0.29595965 -0.525085 -0.22241342]ParMatrix{Float32}(128, 20, UUID("5df37970-e9d3-4332-9ff9-179c40