# Debug and test flexOPT.jl

In [None]:
# Nobuaki Fuji @ipgp September 2025
using Pkg

cd(@__DIR__)
Pkg.activate("../..")
using BenchmarkTools
# below are the tools to debug the code
#using Revise # if we use Revise, include will be Revise.includet
using Profile, StatProfilerHTML

using Metal
include("../src/imageReader.jl") # read 2D images for models

include("../src/OPTwrappers.jl") 
include("../src/OPTnewEngines.jl")  # I do this to test


In [None]:
in_vscode  = get(ENV, "VSCODE_PID", nothing) !== nothing

In [None]:
iExperiment = 1

# 'iExperiment' can be fixed to be 1, or nothing if the user does not need to perform benchmark tests
# (which should be the case for the most of the time when the software gets stabilised)

In [None]:
famousEquationType="2DacousticTime"
exprs,fields,vars,extexprs,extfields,extvars,coordinates,∂,∂²=famousEquations(famousEquationType)

In [None]:
modelName="marmousi"

modelDefinitionMethod="2DimageFile" # ToyModel or 2DimageFile (or 1DsphericalPlanet)
model =nothing


if modelDefinitionMethod !== nothing
        
    #region Model input - option i) Model domain definition

    if modelDefinitionMethod === "ToyModel"
        DomainWindow=(DomainWindowT=1.0,DomainWindowX=1.0,DomainWindowY=1.0,DomainWindowZ=1.0)
        ModelSizeTXYZ=(ModelSizeT=101,ModelSizeX=101,ModelSizeY=101,ModelSizeZ=0)
    end

    #endregion

    #region Model input - option ii) Read a file (2D or 3D) and define Δs

    if modelDefinitionMethod === "2DimageFile"

        #imagefile="../data/model/random/colourful.jpg"
        #imagefile="../data/model/artemis/IMG_6098.jpeg"
        #imagefile="../data/model/random/tmp.png"
        imagefile = "../data/model/random/marmousi.png"
        colormap = "jet" #colormap can be RGB vector or predefined colormap

        #model=read2DimageModel(imagefile,colormap;Nwidth=10,Nheight=10,showRecoveredImage=false)
        model=read2DimageModel(imagefile,colormap;showRecoveredImage=false)
    end
    #endregion

    #region Model input - option iii) Read a file (1D spherical planet models)

    if modelDefinitionMethod ==="1DsphericalPlanet"
        # use some programmes that are developed during Xmas 2023
        # inputModels.jl
    end

#endregion
end

In [None]:
@show Nz,Nx=size(model)
Δz,Δx=(1.0,1,0)

In [None]:
# I don't know why the backend becomes GLMakie without this ... need to check

using CairoMakie
CairoMakie.activate!()

In [None]:
fig, ax, hm = heatmap(
    #topo.x,topo.y,topo.z';
    collect((0:1:(Nx-1)).*Δx),(collect(-(Nz-1)).*Δz:1:0), reverse(model',dims=2);
    colormap = :plasma,
    colorrange=(extrema(model)),
    axis = (xlabel = "horizontal", ylabel = "depth from the surface", title = "Marmousi model")
)
#ylims!(ax,-200,300)
#xlims!(ax,0,400)
Colorbar(fig[1,2], hm, label="Wave speed (not physical)")
fig

In [None]:
coordinates

In [None]:
Δnum = (1.0,1.0,1.0) # this should be in the same order as coordinates 



IneedExternalSources = true
maskedRegionForSourcesInSpace = nothing

#DrWatson configurations

orderBtime=1
orderBspace=1
pointsInSpace=2
pointsInTime=2

WorderBspace=0
WorderBtime=0
supplementaryOrder=2

In [None]:
models=[] # you might need to make this empty tuple first, otherwise one-member tuple can be misinterpreted
models=push!(models, (model .* 0.2 .+ 0.4))

In [None]:
maximum(models[1])

In [None]:
fakeNt = 1
timeMarching = any(a -> a === timeDimensionString, string.(coordinates)) 
if timeMarching
    fakeNt = pointsInTime+1
    modelPoints = (size(model)...,fakeNt) # Nx, Ny etc thing. Nt is also mentioned and it should be the last element!
else
    modelPoints = (size(model))
end

In [None]:
maskedRegionForSourcesInSpace  = Array{CartesianIndex,1}(undef,0) # it is important to decalre the type of this
maskedRegionForSourcesInSpace = push!(maskedRegionForSourcesInSpace, CartesianIndex(modelPoints[1:end-1].÷2))# in Ndimension (or Ndimension  - 1 if timeMarching)
# in this example, I put a point source at the centre of the model space

forceModels =((1.0)) # if your model does not have anything special material parameters then it's how it's written

concreteModelParameters = @strdict famousEquationType Δnum orderBtime orderBspace WorderBtime WorderBspace supplementaryOrder pointsInSpace pointsInTime IneedExternalSources modelName models modelPoints forceModels maskedRegionForSourcesInSpace iExperiment

In [None]:
#safeget(A, inds...; default=0) = checkbounds(Bool, A, inds...) ? A[inds...] : default

In [None]:
# we need to see what's happening inside makeCompleteCostFunctions

In [None]:
    @unpack famousEquationType, Δnum, orderBtime, orderBspace, WorderBtime,WorderBspace,supplementaryOrder,pointsInSpace, pointsInTime, IneedExternalSources, modelName, models, modelPoints, forceModels,maskedRegionForSourcesInSpace, iExperiment = concreteModelParameters
    exprs,fields,vars,extexprs,extfields,extvars,coordinates,∂,∂² = famousEquations(famousEquationType)
    global ∂,∂²
    
    # here we construct semi symbolic operators (with numerical Δnum)
    operatorConfigurations = @strdict famousEquationType Δnum orderBtime orderBspace WorderBtime WorderBspace supplementaryOrder pointsInSpace pointsInTime IneedExternalSources iExperiment


In [None]:
#instead of doing this, I go step by step to see what's happening
#operators = myProduceOrLoad(OPTobj,operatorConfigurations,"semiSymbolics")

In [None]:
 @unpack famousEquationType, Δnum, orderBtime, orderBspace, WorderBtime,WorderBspace,supplementaryOrder,pointsInSpace, pointsInTime,IneedExternalSources, iExperiment= operatorConfigurations

exprs,fields,vars,extexprs,extfields,extvars,coordinates,∂,∂² = famousEquations(famousEquationType)

TaylorOptions=(WorderBtime=WorderBtime,WorderBspace=WorderBspace,supplementaryOrder=supplementaryOrder)
trialFunctionsCharacteristics=(orderBtime=orderBtime,orderBspace=orderBspace,pointsInSpace=pointsInSpace,pointsInTime=pointsInTime)

In [None]:
# instead of doing below
#@time operatorData=OPTobj(exprs,fields,vars; coordinates=coordinates,trialFunctionsCharacteristics=trialFunctionsCharacteristics,TaylorOptions=TaylorOptions,Δnum = Δnum,iExperiment=iExperiment)

In [None]:
# here is the internal contents of OPTobj(exprs,fields,vars; coordinates=coordinates,trialFunctionsCharacteristics=trialFunctionsCharacteristics,TaylorOptions=TaylorOptions,Δnum = Δnum,iExperiment=iExperiment)

In [None]:


    @show timeMarching = any(a -> a === timeDimensionString, string.(coordinates))


    @unpack orderBtime, orderBspace, pointsInSpace, pointsInTime = trialFunctionsCharacteristics
    @unpack WorderBtime, WorderBspace,supplementaryOrder = TaylorOptions

    NtypeofExpr=length(exprs)   # number of governing equations
    NtypeofMaterialVariables=length(vars) # number of material coefficients
    NtypeofFields=length(fields) # number of unknown fields
    
    Ndimension = length(coordinates) # we do not change this for the moment, especially for the time-marching scheme
    pointsUsed = ones(Int, Ndimension).*(pointsInSpace+1)
    if timeMarching
        pointsUsed[end]=pointsInTime+1
    end


    if length(Δnum) !== Ndimension && !CˡηSymbolicInversion
        @error "the numerical delta increment has not the same dimension!"
    end

In [None]:
 #region investigation of all the fields and vars dependencies in terms of x-y-z-t

    variableDependency=ones(Int,Ndimension)
    fieldDependency=ones(Int,Ndimension)
    eachVariableDependency=ones(Int,Ndimension,NtypeofMaterialVariables) 
    eachFieldDependency=ones(Int,Ndimension,NtypeofFields)
  
    for iFields in 1:NtypeofFields
        eachFieldDependency[:,iFields]=findCartesianDependency(fields[iFields],Ndimension)
        fieldDependency = fieldDependency .* (ones(Int,Ndimension).-eachFieldDependency[:,iFields])
    end


    for iVars in 1:NtypeofMaterialVariables
        eachVariableDependency[:,iVars]=findCartesianDependency(vars[iVars],Ndimension)
        variableDependency = variableDependency .* (ones(Int,Ndimension).-eachVariableDependency[:,iVars])
    end

    

    fieldDependency = ones(Int,Ndimension).-fieldDependency
    variableDependency = ones(Int,Ndimension).-variableDependency

    # here we correct variableDependency with fieldDependency: if fieldDependency is zero then we do not take care of that dimension for the variables
    variableDependency = variableDependency .* fieldDependency

    #endregion

In [None]:
#region definition of points in time and space to be used

    # heaviside(x) = x > 0 ? 1 : x == 0 ? 0 : -1

    # the orders of B-spline functions, depending on fields 

    orderBspline=zeros(Int,Ndimension)
    WorderBspline=zeros(Int,Ndimension)

    if timeMarching
        orderBspline[Ndimension]=orderBtime*fieldDependency[Ndimension]
        orderBspline[1:Ndimension-1]=orderBspace*fieldDependency[1:Ndimension-1]
        WorderBspline[Ndimension]=WorderBtime*fieldDependency[Ndimension]
        WorderBspline[1:Ndimension-1]=WorderBspace*fieldDependency[1:Ndimension-1]
    else
        orderBspline[1:Ndimension]=orderBspace*fieldDependency[1:Ndimension]
        WorderBspline[1:Ndimension]=WorderBspace*fieldDependency[1:Ndimension]
    end
    
    # the maximum number of points used in the vicinity of the node, which is independent of the order of B-spline functions (see our paper)
    pointsUsedForFields=(pointsUsed.-1).*fieldDependency.+1

    # orderExpressions is the maximal orders of partials that we could expect in the expressions
    orderExpressions=pointsUsedForFields
    
    # numbers of points to evaluate the integral for the governing equation filtered by the test functions
    
    # orderU is the maximum orders for the fields that we will use for OPT coefficients' exploration
    orderU = (orderExpressions .-1) .+ (supplementaryOrder .*fieldDependency).+1 
    # we restore this orderU since we need to control this 

    #endregion

In [None]:
#region analysis of expressions to obtain the α_{n'nji}

    bigα=Array{Any,2}(missing,NtypeofFields,NtypeofExpr)
    varM=nothing
    for iExpr in eachindex(exprs)
        for iField in eachindex(fields)
            
            tmpNonZeroAlphas=PDECoefFinder(orderExpressions,coordinates,exprs[iExpr],fields[iField],vars) 
            # we assume that the pointsUsedForFields represent the highest order of partials
            bigα[iField,iExpr]=unique(tmpNonZeroAlphas)
        end
    end
    varM=varMmaker(pointsUsedForFields,coordinates,vars)
    @show bigα,varM
    #endregion


In [None]:

    #region Preparation for Taylor expansion
    
    orderTaylors=Array{Any,Ndimension}(undef,Tuple(orderU))
    pointsInSpaceTime=Array{Any,Ndimension}(undef,Tuple(pointsUsedForFields))
    
    
    multiOrdersIndices=CartesianIndices(orderTaylors)

    availablePointsConfigurations = Array{Array{Vector{Int64},Ndimension},1}()
    centrePointConfigurations=Array{Int64,1}()

    #endregion


In [None]:

    #region Cartesian indices that can be available to use (normally: iGeometry=1)

    multiPointsIndices=CartesianIndices(pointsInSpaceTime)
    # this is the whole local Cartesian grids (without any lacking points)
    
    tmpVecForMiddlePoint = ((car2vec(multiPointsIndices[end]).-1 ).÷2 ).+1 # only valid for testOnlyCentre
    midTimeCoord = nothing
    if timeMarching
        midTimeCoord=car2vec(multiPointsIndices[end])[end]-1
        tmpVecForMiddlePoint[end]=midTimeCoord
        #AjiννᶜU = Array{Num,2}(undef,length(multiPointsIndices)÷(midTimeCoord+1),NtypeofExpr)
    end
    #@show tmpVecForMiddlePoint 
    middleν=vec2car(tmpVecForMiddlePoint)

    @show availablePointsConfigurations=push!(availablePointsConfigurations,car2vec.(multiPointsIndices))
    centrePointConfigurations=push!(centrePointConfigurations,LinearIndices(multiPointsIndices)[middleν])
    @show size(availablePointsConfigurations)
    #endregion

In [None]:
CˡηSymbolicInversion=false
testOnlyCentre=true
iExperiment =nothing

In [None]:
 #region obtaining the semi-symbolic expression of cost function based on A given by eqns. 52 and 53.

    # before calling AuSymbolic we can manipulate pointsIndices for various boundary configurations


    if CˡηSymbolicInversion # this seems super cool but it takes time
        #Cˡη,Δ,multiLCar = illposedTaylorCoefficientsInversion(coordinates,multiOrdersIndices,multiPointsIndices;testOnlyCentre=testOnlyCentre,timeMarching=timeMarching)
        Δ = Symbolics.variables(:Δ,1:Ndimension)
    else
        Δ = Δnum
    end

    AjiννᶜU=[]
    Ulocal=[]



    @show typeof(Δ)
    @show typeof(Δ) <: Tuple{Vararg{Float64}}
    @show typeof(coordinates) <:Tuple{Vararg{Num}}

In [None]:
iConfigGeometry=1

In [None]:
       @show pointsIndices=availablePointsConfigurations[iConfigGeometry]
        @show middleLinearν=centrePointConfigurations[iConfigGeometry]
        

        

        #tmpAjiννᶜU,tmpUlocal=ASemiSymbolic(coordinates,multiOrdersIndices,pointsIndices,multiPointsIndices,middleLinearν,Δ,varM,bigα,orderBspline,WorderBspline,NtypeofExpr,NtypeofFields)



        #varM is given above for the max number of points used 
        #tmpAjiννᶜU,tmpUlocal=AuSymbolic(coordinates,multiOrdersIndices,pointsIndices,multiPointsIndices,middleLinearν,Δ,varM,bigα,orderBspline,WorderBspline,NtypeofExpr,NtypeofFields)
        #AjiννᶜU=push!(AjiννᶜU,tmpAjiννᶜU)
        #Ulocal=push!(Ulocal,tmpUlocal)

In [None]:
# just to getIntegralWYYKK
iCoord=1
integralParams = @strdict oB =orderBspline[iCoord] oWB = WorderBspline[iCoord] νCoord=pointsIndices[middleLinearν][iCoord] LCoord = multiPointsIndices[end][iCoord] ΔCoord=Δ[iCoord] l_n_max=L_MINUS_N[end][iCoord]
        output = myProduceOrLoad(getIntegralWYYKK,integralParams,"intKernel")
integral1DWYYKK[iCoord] = output["intKernelforνLΔ"]
@show output["modμ"]
linearμᶜ=1;linearμ=1;l_n_variable=0;l_n_field=0
@show integral1DWYYKK[iCoord][pointsIndices[linearμᶜ][iCoord],pointsIndices[linearμ][iCoord],l_n_variable+1,l_n_field+1]

In [None]:
# don't know if this works

using KernelAbstractions, Adapt
# assume backend already detected and `stack_integrals` exists

# ---------- GPU kernel: one thread per (iExpr, iField)
# Kout is the output CoefU array [nExpr, nFields]
@kernel function compute_CoefU_kernel!(
    Kout,            # Float array (nExpr, nFields)
    S,               # stacked integrals: (ncoord, nmu, nmu, nL, nL)
    Lmat, Pmat,      # mapping arrays as before
    # C matrices packed:
    C_left,          # Float array: (n_mu_plus_eta, nL)  -- tmpCˡη (μ+η x linearl)
    C_right,         # Float array: (n_muc_plus_eta, nL) -- tmpCˡημᶜ (μc+ηc x linearlc)
    Ulocal_arr,      # Float array: (n_mu_plus_eta, nFields)
    # alpha indexing:
    alpha_flat,      # Int32 flattened alpha indices
    alpha_ptrs,      # Int32 start pointers per pair (len = nExpr*nFields+1) 1-based
    # alpha properties:
    alpha_n,         # Int32 array of shape (n_alpha, ncoord)  -- base n for each alpha
    alpha_nc,        # Int32 array (n_alpha, ncoord)
    # substituted values pre-evaluated:
    subVal,          # Float array: (n_alpha, n_muc_plus_eta) numeric
    # dims
    nExpr::Int32, nFields::Int32,
    ncoord::Int32, nmu::Int32, nL::Int32
    )
    (iex, jfield) = @index(Global, NTuple{2})
    if iex <= nExpr && jfield <= nFields
        acc = zero(eltype(Kout))
        # compute flattened pair index for alpha_ptrs (1-based)
        pair_idx = (iex - 1) * nFields + jfield
        startp = alpha_ptrs[pair_idx]
        endp   = alpha_ptrs[pair_idx+1] - 1

        # iterate over alpha entries
        for aidx in startp:endp
            alpha_id = alpha_flat[aidx]            # actual alpha index (1-based)
            # read base multi-indices n and n^c
            # alpha_n and alpha_nc are stored as contiguous blocks: alpha_n[(alpha_id-1)*ncoord + ic]
            # but for simplicity we assume alpha_n is shaped (n_alpha, ncoord)
            @inbounds for μc_idx in 1:nmu
                for μ_idx in 1:nmu
                    # iterate inner sums over μ+η and μc+ηc
                    # But we don't have direct lists for μ+η; here we assume
                    # C_left and Ulocal are indexed by μ_plus_eta_idx
                    # and C_right by μc_plus_eta_idx.
                    # We must loop over μ_plus_eta and μc_plus_eta indices:
                    # For performance you'd probably tile over these too, but here we keep it simple.
                    # If subVal is (n_alpha, n_muc_plus_eta) we need μc_plus_eta index in 1..n_muc_plus_eta.
                    # We iterate over those ranges:
                    for μcpe in 1:size(C_right, 1)         # μc_plus_eta index
                        tmpCr = C_right[μcpe, :]           # view of size nL
                        ssub = subVal[alpha_id, μcpe]
                        if ssub == 0.0
                            continue
                        end
                        for μpe in 1:size(C_left, 1)       # μ_plus_eta index
                            tmpCl = C_left[μpe, :]        # view of size nL
                            U_here = Ulocal_arr[μpe, jfield]
                            # Now we iterate l and l^c indices (linearl indices)
                            # For now loop full range of nL (you can shrink this if you precomputed lists)
                            prod_sum = zero(eltype(acc))
                            # We'll compute kernelProducts for the (ic) product using S
                            # But this depends on lvar and lfield; so we loop over l and lc
                            for linearl in 1:nL
                                for linearlc in 1:nL
                                    # build kernelProducts across coordinates
                                    kernp = one(eltype(acc))
                                    @inbounds for ic in 1:ncoord
                                        l_field = Lmat[linearl, ic]  # careful: ensure Lmat layout matches
                                        l_var   = Lmat[linearlc, ic]
                                        # choose μc, μ indices appropriate: here we used μ_idx, μc_idx
                                        # but S expects (ic, μc, μ, lvar+1, lfield+1)
                                        kernp *= S[ic, μc_idx, μ_idx, l_var+1, l_field+1]
                                    end
                                    # multiply with C entries (tmpCl[linearl], tmpCr[linearlc])
                                    prod_sum += tmpCl[linearl] * tmpCr[linearlc] * kernp
                                end
                            end
                            # accumulate: multiply by substituted value and U
                            acc += prod_sum * ssub * U_here
                        end
                    end
                end
            end
        end
        Kout[iex, jfield] = acc
    end
end


In [None]:
using KernelAbstractions
using Adapt

# ---------------------------------------------------------------------
# Robust backend detection (CUDA → Metal → CPU)
function detect_backend()
    if @isdefined(CUDA) && CUDA.has_cuda()
        println("→ Using CUDA backend")
        return CUDABackend()
    elseif @isdefined(Metal)
        try
            devs = Metal.devices()
            if !isempty(devs)
                println("→ Using Metal backend (", length(devs), " device(s))")
                return MetalBackend()
            end
        catch err
            @warn "Metal available but cannot query devices: $err"
        end
    end
    println("→ Using CPU backend (no GPU detected)")
    return CPU()
end

const backend = detect_backend()
println("Selected backend type: ", typeof(backend))


In [None]:
backend

In [None]:
# GPU-enabled implementation of your kernelIntegral assembly
# Portable: tries CUDA, then Metal, then CPU fallback (KernelAbstractions.jl)
# Includes a small correctness test comparing CPU vs GPU results on random data.

using KernelAbstractions
using Adapt


println("Selected backend: ", typeof(backend))

# ---------------------------------------------------------------------
# Helper: build stacked 5D array from vector of per-coordinate 4D arrays
# integral1DWYYKK[iCoord] has shape (nmu, nmu, nL, nL)
function stack_integrals(integral1DWYYKK)
    ncoord = length(integral1DWYYKK)
    nmu, _, nL, _ = size(integral1DWYYKK[1])
    S = zeros(eltype(integral1DWYYKK[1]), ncoord, nmu, nmu, nL, nL)
    for ic in 1:ncoord
        S[ic, :, :, :, :] .= integral1DWYYKK[ic]
    end
    return S
end

# Kernel: one thread per (i,j) entry of kernelIntegral
@kernel function kernelIntegrals!(K, S, Lmat, Pmat, ncoord, nmu, nL, nPoints)
    (i, j) = @index(Global, NTuple)
    if i <= size(K,1) && j <= size(K,2)
        acc = zero(eltype(K))
        # local indices in Julia are 1-based
        for μc in 1:nPoints
            for μ in 1:nPoints
                prod = one(eltype(K))
                @inbounds for ic in 1:ncoord
                    l_field = Lmat[i, ic]  # row i, column ic
                    l_var   = Lmat[j, ic]
                    μc_idx = Pmat[μc, ic]
                    μ_idx  = Pmat[μ, ic]
                    # S has dims (ncoord, nmu, nmu, nL, nL)
                    prod *= S[ic, μc_idx, μ_idx, l_var+1, l_field+1]
                end
                acc += prod
            end
        end
        K[i,j] = acc
    end
    #return
end

          
# Top-level function to build and run GPU kernel
function build_kernelIntegral_gpu(integral1DWYYKK, L_MINUS_N, pointsTuples, coordinates; use_float32::Bool=false)
    # stack integrals to a 5D array: (ncoord, nmu, nmu, nL, nL)
    S = stack_integrals(integral1DWYYKK)
    ncoord, nmu, _, nL, _ = size(S)
    nL = Int(nL)
    nPoints = length(pointsTuples)

    # Build Lmat: nL x ncoord, row i contains the multi-index tuple for L_MINUS_N[i]
    Lmat = Array{Int32}(undef, nL, ncoord)
    for i in 1:nL
        t = Tuple(L_MINUS_N[i])
        for ic in 1:ncoord
            Lmat[i, ic] = Int32(t[ic])
        end
    end

    # Build Pmat: nPoints x ncoord, row p contains the point tuple
    Pmat = Array{Int32}(undef, nPoints, ncoord)
    for p in 1:nPoints
        t = Tuple(pointsTuples[p])
        for ic in 1:ncoord
            Pmat[p, ic] = Int32(t[ic])
        end
    end

    # Choose element type
    if use_float32
        S = Float32.(S)
        kernel_eltype = Float32
    else
        kernel_eltype = Float64
    end

    # allocate kernelIntegral on host and device
    K = zeros(kernel_eltype, nL, nL)

    # adapt arrays to backend
    S_d = Adapt.adapt(backend, S)
    Lmat_d = Adapt.adapt(backend, Lmat)
    Pmat_d = Adapt.adapt(backend, Pmat)
    K_d = Adapt.adapt(backend, K)

    # Launch kernel: global size (nL, nL)
    kconfig = (nL, nL)
    # Workgroup (tile) size — 16x16 is good default
    workgroup = (16, 16)

    # Global NDRange (full matrix size)
    ndrange = (nL, nL)
    kernelIntegrals!(backend, workgroup)(K_d, S_d, Lmat_d, Pmat_d,Int32(ncoord), Int32(nmu), Int32(nL), Int32(nPoints);ndrange = ndrange)

    # synchronize and fetch
    if @isdefined(CUDA) && backend isa CUDABackend
        CUDA.synchronize()
    elseif @isdefined(Metal) && backend isa MetalBackend
        Metal.synchronize()
    else
        KernelAbstractions.synchronize(backend)
    end
    K_res = Array(K_d)
    return K_res
end


In [None]:
# inside ASemiSymbolic

function precompute_valid_pairs(ensemblePoints2,ensemblePoints)
    valid_pairs = Tuple{CartesianIndex,CartesianIndex}[]
    for l in ensemblePoints
        for lᶜ in ensemblePoints2
            # If any extra condition applies, check here
            push!(valid_pairs, (lᶜ,l))
        end
    end
    return valid_pairs
end

function precompute_valid_pairs(ensemblePoints)
    valid_pairs = Tuple{CartesianIndex,CartesianIndex}[]
    for l in ensemblePoints
        for lᶜ in ensemblePoints
            # If any extra condition applies, check here
            push!(valid_pairs, (lᶜ,l))
        end
    end
    return valid_pairs
end

function ASemiSymbolic(coordinates,multiOrdersIndices,pointsIndices,multiPointsIndices,middleLinearν,Δ,varM,bigα,orderBspline,WorderBspline,NtypeofExpr,NtypeofFields)
    # I write this function to be able to go through the matrix inversion path
    # the model function is AuSymbolic (below)


    # here we develop!!! 06/11/2025!!!


    νIndices=[pointsIndices[14]] # here I just put this, since normally ν that defines L(ν) in relative coordinates is centered 


    tmp=[]
    #region preparation 

    L_MINUS_N = multiOrdersIndices
    @show L_MINUS_N = L_MINUS_N .-L_MINUS_N[1]
    
    

    #endregion

    #region we compute the integral for 1D domain(s)

    integral1DWYYKK = Array{Any,1}(undef,length(coordinates))
    modifiedμ=Array{Any,1}(undef,length(coordinates))
    for iCoord in eachindex(coordinates) # for each 
        integralParams = @strdict oB =orderBspline[iCoord] oWB = WorderBspline[iCoord] νCoord=pointsIndices[middleLinearν][iCoord] LCoord = multiPointsIndices[end][iCoord] ΔCoord=Δ[iCoord] l_n_max=L_MINUS_N[end][iCoord]
        output = myProduceOrLoad(getIntegralWYYKK,integralParams,"intKernel")
        integral1DWYYKK[iCoord] = output["intKernelforνLΔ"]
        modifiedμ[iCoord] = output["modμ"] # this can be still 'nothing'
    end
    @show typeof(integral1DWYYKK[1]), typeof(modifiedμ[1])
    @show size(integral1DWYYKK[1])
    @show modifiedμ,integral1DWYYKK[1]
    #endregion


    coefInversionDict = @strdict coordinates multiOrdersIndices pointsIndices Δ WorderBspline modifiedμ

    output=myProduceOrLoad(TaylorCoefInversion,coefInversionDict,"taylorCoefInv")
    Cˡη=output["CˡηGlobal"]

    # the order is: (νᶜ,) ν, i, j  here

    Ajiννᶜ = Array{Num,4}(undef,length(pointsIndices),length(pointsIndices),NtypeofFields,NtypeofExpr)


    LI_points = LinearIndices(pointsIndices)
    #LI_multi  = LinearIndices(multiOrdersIndices)
    LI_L_MINUS_N = LinearIndices(L_MINUS_N)

    


    # prepare N-dimension WYYKK kernel integral over Ω

    #kernelIntegral=Array{Float64,2}(undef,length(L_MINUS_N),length(L_MINUS_N))

    #kernelIntegral=build_kernelIntegral_gpu(integral1DWYYKK, L_MINUS_N, pointsIndices, coordinates; use_float32=true)


    

    function eachElementAjiννᶜ(νᶜ,ν,iField,iExpr)

        μ_plus_η = νᶜ # we do not subtract from ν since the coordinates are local and ν != (1,1,...)
        linearμ_plus_η=LI_L_MINUS_N[μ_plus_η]
        Atmp=0
        Areturn =0
        α = bigα[iExpr, iField]
        μᶜ_μ_pairs=precompute_valid_pairs(vec2car.(pointsIndices))

        for eachα ∈ α 
            nᶜ = eachα.nᶜ
            n = eachα.n
            nodeValue = eachα.node

            L_avail = (n .+ L_MINUS_N) ∩ L_MINUS_N
            Lᶜ_avail = (nᶜ .+ L_MINUS_N) ∩ L_MINUS_N

            lᶜ_l_pairs= precompute_valid_pairs(Lᶜ_avail,L_avail)

            localmapηᶜ = Dict{Any,Any}()

            for μᶜ_plus_ηᶜ ∈ pointsIndices
                linearμᶜ_plus_ηᶜ=LI_points[μᶜ_plus_ηᶜ...]
                
                for iVar in eachindex(vars)
                    localmapηᶜ[iVar] = varM[iVar, linearμᶜ_plus_ηᶜ][]
                end 
                @show nodeValue,localmapηᶜ
                @show substitutedValue = substitute(nodeValue, localmapηᶜ)

                # Precompute l_pairs as linear indices (1-based)
                l_pairs = Tuple{Int,Int}[]
                for (lᶜ,l) in lᶜ_l_pairs
                    li = Int(LI_L_MINUS_N[l])    # linearl
                    lc = Int(LI_L_MINUS_N[lᶜ])   # linearlc
                    push!(l_pairs, (li, lc))
                end

                # Precompute mu_pairs (list of (μᶜ, μ) index pairs in 1-based)
                mu_pairs = [(Int(μᶜ), Int(μ)) for (μᶜ, μ) in μᶜ_μ_pairs]

                # prepare tmpC vectors (for current linearμ_plus_η, linearμᶜ_plus_ηᶜ)
                tmpC_left  = T_gpu.(vec(tmpCˡημ[linearμ_plus_η, :]))         # length nL
                tmpC_right = T_gpu.(vec(tmpCˡημᶜ[linearμᶜ_plus_ηᶜ, :]))       # length nL

                # call GPU helper (returns Float32 scalar)
                gpu_sum = gpu_sum_over_lpairs_mu_pairs!(
                    stackedS, Lmat, backend, l_pairs, mu_pairs, tmpC_left, tmpC_right;
                    workgroup=(256,)
                )

                """
                pureFloatKernel = 1.0

                for (lᶜ,l) ∈ lᶜ_l_pairs 
                    linearlᶜ=LI_L_MINUS_N[lᶜ]
                    linearl=LI_L_MINUS_N[l]
                    l_n_field = Tuple(l-n)
                    l_n_variable = Tuple(lᶜ-nᶜ)
                    for (μᶜ,μ) ∈ μᶜ_μ_pairs
                        for iCoord ∈ eachindex(coordinates)
                            pureFloatKernel*=integral1DWYYKK[iCoord][μᶜ[iCoord],μ[iCoord],l_n_variabl[iCoord]e+1,l_n_field[iCoord]+1]*tmpCˡημᶜ[linearμᶜ_plus_ηᶜ,linearlᶜ]*tmpCˡημ[linearμ_plus_η,linearl]
                        end
                    end
                end
                """
                Atmp += substitutedValue*gpu_sum
            end
            Areturn+=Atmp
        end
        return Areturn
    end


    for iExpr in eachindex(exprs) # j of Ajiννᶜ
        for iField in eachindex(fields) # i of Ajiννᶜ
            for ν in νIndices # ν in Ajiννᶜ (in the relative coordinates) 
                for νᶜ in pointsIndices # νᶜ in Ajiννᶜ (in the relative coordinates)
                    @show ν, νᶜ
                    Ajiννᶜ = eachElementAjiννᶜ(νᶜ,ν,iField,iExpr)
                end
            end
        end
    end
                  

                    




    return Ajiννᶜ,tmp
end

In [None]:
tmpAjiννᶜU,tmp=ASemiSymbolic(coordinates,multiOrdersIndices,pointsIndices,multiPointsIndices,middleLinearν,Δ,varM,bigα,orderBspline,WorderBspline,NtypeofExpr,NtypeofFields)
@show tmp

In [None]:
# Requires: KernelAbstractions, Adapt, and `backend` already set (Metal/CPU/CUDA)
# Use Float32 on GPU for speed (change T_gpu = Float64 if you need doubles)
const T_gpu = Float32

# Kernel: one thread computes contribution for a single (linearl, linearlc, μc, μ) combination
@kernel function kernel_contrib_per_combo!(
    out,      # device array length = Ncombos, type T_gpu
    S,        # stacked integrals: (ncoord, nmu, nmu, nL, nL) of type T_gpu
    Lmat,     # Int32 array (nL, ncoord) where row r gives multi-index entries for linearl r
    linearl_arr,   # Int32 array length Ncombos of linearl indices (1-based)
    linearlc_arr,  # Int32 array length Ncombos of linearlc indices (1-based)
    μc_arr,        # Int32 array length Ncombos of μc indices (1-based)
    μ_arr,         # Int32 array length Ncombos of μ indices (1-based)
    tmpC_left,     # T_gpu vector length nL
    tmpC_right,    # T_gpu vector length nL
    ncoord::Int32,
    nL::Int32
)
    tid = @index(Global, NTuple)   # 1D index
    if tid <= length(out)
        li = linearl_arr[tid]
        lc = linearlc_arr[tid]
        μc = μc_arr[tid]
        μ  = μ_arr[tid]

        prod = one(eltype(out))
        @inbounds for ic in 1:ncoord
            l_field = Lmat[li, ic]      # stored as row=linearl, col=ic
            l_var   = Lmat[lc, ic]
            # S dims: (ncoord, nmu, nmu, nL, nL)
            prod *= S[ic, μc, μ, l_var+1, l_field+1]
        end
        # multiply by C factors
        out[tid] = tmpC_right[lc] * tmpC_left[li] * prod
    end
end

# Host helper to call kernel for given lists and sum the results
function gpu_sum_over_lpairs_mu_pairs!(
        stackedS, Lmat, backend,
        l_pairs::Vector{Tuple{Int,Int}},   # pairs of (linearl, linearlc) as linear indices 1-based
        mu_pairs::Vector{Tuple{Int,Int}},  # pairs of (μc, μ) as indices 1-based
        tmpC_left::Vector{T_gpu},          # length nL
        tmpC_right::Vector{T_gpu};         # length nL
        workgroup=(256,)
    )

    # build flattened arrays of all combinations (cartesian product)
    npairs = length(l_pairs)
    nmu_pairs = length(mu_pairs)
    Ncombos = npairs * nmu_pairs
    if Ncombos == 0
        return zero(T_gpu)
    end

    # Preallocate host arrays
    linearl_arr_h = Vector{Int32}(undef, Ncombos)
    linearlc_arr_h = Vector{Int32}(undef, Ncombos)
    μc_arr_h = Vector{Int32}(undef, Ncombos)
    μ_arr_h  = Vector{Int32}(undef, Ncombos)

    idx = 1
    for (li, lc) in l_pairs
        li32 = Int32(li)
        lc32 = Int32(lc)
        for (μc, μ) in mu_pairs
            linearl_arr_h[idx]  = li32
            linearlc_arr_h[idx] = lc32
            μc_arr_h[idx] = Int32(μc)
            μ_arr_h[idx]  = Int32(μ)
            idx += 1
        end
    end

    # device adapt (if you call this many times, adapt outside and reuse)
    out_h = zeros(T_gpu, Ncombos)
    out_d = Adapt.adapt(backend, out_h)
    S_d = Adapt.adapt(backend, convert(Array{T_gpu}, stackedS))   # convert once
    Lmat_d = Adapt.adapt(backend, Int32.(Lmat))
    linearl_arr_d  = Adapt.adapt(backend, linearl_arr_h)
    linearlc_arr_d = Adapt.adapt(backend, linearlc_arr_h)
    μc_arr_d = Adapt.adapt(backend, μc_arr_h)
    μ_arr_d  = Adapt.adapt(backend, μ_arr_h)
    tmpC_left_d  = Adapt.adapt(backend, tmpC_left)
    tmpC_right_d = Adapt.adapt(backend, tmpC_right)

    # launch kernel: Ncombos threads
    ndrange = (Ncombos,)
    ncoord = Int32(size(stackedS,1))
    nL = Int32(size(Lmat,1))
    kernel_contrib_per_combo!(backend, workgroup)(
        out_d, S_d, Lmat_d, linearl_arr_d, linearlc_arr_d, μc_arr_d, μ_arr_d,
        tmpC_left_d, tmpC_right_d, ncoord, nL;
        ndrange = ndrange
    )

    # synchronize (portable)
    if @isdefined(CUDA) && backend isa CUDABackend
        CUDA.synchronize()
    elseif @isdefined(Metal) && backend isa MetalBackend
        Metal.synchronize()
    else
        KernelAbstractions.synchronize(backend)
    end

    # fetch and sum on host
    out = Array(out_d)   # Ncombos vector
    s = sum(out)         # scalar T_gpu
    return s
end


In [None]:
# inside ASemiSymbolic

function precompute_valid_pairs(ensemblePoints2,ensemblePoints)
    valid_pairs = Tuple{CartesianIndex,CartesianIndex}[]
    for l in ensemblePoints
        for lᶜ in ensemblePoints2
            # If any extra condition applies, check here
            push!(valid_pairs, (lᶜ,l))
        end
    end
    return valid_pairs
end

function precompute_valid_pairs(ensemblePoints)
    valid_pairs = Tuple{CartesianIndex,CartesianIndex}[]
    for l in ensemblePoints
        for lᶜ in ensemblePoints
            # If any extra condition applies, check here
            push!(valid_pairs, (lᶜ,l))
        end
    end
    return valid_pairs
end

function ASemiSymbolic(coordinates,multiOrdersIndices,pointsIndices,multiPointsIndices,middleLinearν,Δ,varM,bigα,orderBspline,WorderBspline,NtypeofExpr,NtypeofFields)
    # I write this function to be able to go through the matrix inversion path
    # the model function is AuSymbolic (below)


    # here we develop!!! 06/11/2025!!!


    νIndices=[pointsIndices[14]] # here I just put this, since normally ν that defines L(ν) in relative coordinates is centered 


    tmp=[]
    #region preparation 

    L_MINUS_N = multiOrdersIndices
    @show L_MINUS_N = L_MINUS_N .-L_MINUS_N[1]
    
    

    #endregion

    #region we compute the integral for 1D domain(s)

    integral1DWYYKK = Array{Any,1}(undef,length(coordinates))
    modifiedμ=Array{Any,1}(undef,length(coordinates))
    for iCoord in eachindex(coordinates) # for each 
        integralParams = @strdict oB =orderBspline[iCoord] oWB = WorderBspline[iCoord] νCoord=pointsIndices[middleLinearν][iCoord] LCoord = multiPointsIndices[end][iCoord] ΔCoord=Δ[iCoord] l_n_max=L_MINUS_N[end][iCoord]
        output = myProduceOrLoad(getIntegralWYYKK,integralParams,"intKernel")
        integral1DWYYKK[iCoord] = output["intKernelforνLΔ"]
        modifiedμ[iCoord] = output["modμ"] # this can be still 'nothing'
    end
    @show typeof(integral1DWYYKK[1]), typeof(modifiedμ[1])
    @show size(integral1DWYYKK[1])
    @show modifiedμ,integral1DWYYKK[1]
    #endregion


    coefInversionDict = @strdict coordinates multiOrdersIndices pointsIndices Δ WorderBspline modifiedμ

    output=myProduceOrLoad(TaylorCoefInversion,coefInversionDict,"taylorCoefInv")
    Cˡη=output["CˡηGlobal"]

    # the order is: (νᶜ,) ν, i, j  here

    Ajiννᶜ = Array{Num,4}(undef,length(pointsIndices),length(pointsIndices),NtypeofFields,NtypeofExpr)


    LI_points = LinearIndices(pointsIndices)
    #LI_multi  = LinearIndices(multiOrdersIndices)
    LI_L_MINUS_N = LinearIndices(L_MINUS_N)

    


    # prepare N-dimension WYYKK kernel integral over Ω

    #kernelIntegral=Array{Float64,2}(undef,length(L_MINUS_N),length(L_MINUS_N))

    kernelIntegral=build_kernelIntegral_gpu(integral1DWYYKK, L_MINUS_N, pointsIndices, coordinates; use_float32=true)


    


    for iExpr in eachindex(exprs) # j of Ajiννᶜ
        for iField in eachindex(fields) # i of Ajiννᶜ
            for ν in νIndices # ν in Ajiννᶜ (in the relative coordinates) 
                for νᶜ in pointsIndices # νᶜ in Ajiννᶜ (in the relative coordinates)
                    Ajiννᶜ = eachElementAjiννᶜ_gpu(νᶜ,ν,iField,iExpr,α;bigα, C_eta_left, C_eta_right, Ulocal,
        pointsIndices, L_MINUS_N, LI_points, LI_L_MINUS_N,
        stackedS, Lmat,
        vars, varM, coordinates,
        batch_mu_pairs = 10000,
        workgroup = (256,))
                end
            end
        end
    end
                  

                    




    return Ajiννᶜ,tmp
end

In [None]:


    for lᶜ_minus_nᶜ ∈ L_MINUS_N
        for l_minus_n ∈ L_MINUS_N
            kernelIntegral[LI_L_MINUS_N[l_minus_n],LI_L_MINUS_N[lᶜ_minus_nᶜ]]=0.0
            for μᶜ ∈ pointsIndices
                linearμᶜ=LI_points[μᶜ]
                for μ ∈ pointsIndices
                    linearμ=LI_points[μ]
                    tmpKernelProduct = 1.0
                    for iCoord in eachindex(coordinates)
                        l_n_field = Tuple(l_minus_n)[iCoord]
                        l_n_variable = Tuple(lᶜ_minus_nᶜ)[iCoord]
                        #tmpKernelProduct*=integral1DWYYKK[iCoord][μᶜ[iCoord],μ[iCoord],l_n_variable+1,l_n_field+1]
                    end
                    #kernelIntegral[LI_L_MINUS_N[l_minus_n],LI_L_MINUS_N[lᶜ_minus_nᶜ]]+=tmpKernelProduct
                end
            end
        end
    end




                    # wow this seems to be super difficult

                    Cˡημᶜ = Cˡη[:, :, linearμᶜ]  # avoid @show!
                    for linearμ in eachindex(pointsIndices), linearμ_plus_η in eachindex(pointsIndices)
                        Coef4U_HERE = 0
                        for eachα in α
                            nᶜ = eachα.nᶜ; n = eachα.n
                            localmapηᶜ = Dict{Any,Any}()
                            for linearμᶜ_plus_ηᶜ in eachindex(pointsIndices)
                                #empty!(localmapηᶜ)
                                μᶜ_plus_ηᶜ_idx = LI_points[vec2car(pointsIndices[linearμᶜ_plus_ηᶜ])]
                                ####localmapηᶜ[vars[iVar]] = varM[iVar, linearμᶜ_plus_ηᶜ][]
                                for iVar in eachindex(vars)
                                    localmapηᶜ[iVar] = varM[iVar, μᶜ_plus_ηᶜ_idx][]
                                end
                                for (l, lᶜ) in valid_pairs
                                #    linearl  = LI_multi[l]
                                #    linearlᶜ = LI_multi[lᶜ]
                                                
                                end
                            end
                        end
                    end
                end
            end

# hereafter I try to optimise the equation 54.

In [None]:
# just a reservoir of for loops

    for iExpr in eachindex(exprs) # j in eq. 54
        for iField in eachindex(fields) # i in eq. 54
            α = bigα[iExpr,iField]

            for linearμᶜ in eachindex(pointsIndices)
                    
                @show tmpCˡημᶜ=Cˡη[:,:,linearμᶜ] # C^{(l')}_{μ'+η';μ',ν}

                for linearμ in eachindex(pointsIndices)

                    tmpCˡημ=Cˡη[:,:,linearμ] # C^{(l)}_{μ+η;μ,ν}


                    for linearμ_plus_η in eachindex(pointsIndices) # relative position νᶜ-ν

                        #U_HERE = Ulocal[linearμ_plus_η,iField]
                        
                        Coef4U_HERE = 0

                        for eachα in α
                            
                            nodeValue=eachα.node
                            nᶜ = eachα.nᶜ
                            n = eachα.n

                            for linearμᶜ_plus_ηᶜ in eachindex(pointsIndices)
                                
                                #linearμᶜ_plus_ηᶜ_in_the_whole = LinearIndices(multiOrdersIndices)[vec2car(pointsIndices[linearμᶜ_plus_ηᶜ])]
                                linearμᶜ_plus_ηᶜ_in_the_whole = LinearIndices(pointsIndices)[vec2car(pointsIndices[linearμᶜ_plus_ηᶜ])]
                                #NF 29/09/2025 I am not sure if it is ok to change like this ...

                                localmapηᶜ=Dict()

                                for iVar in eachindex(vars)
                                    localmapηᶜ[vars[iVar]]=varM[iVar,linearμᶜ_plus_ηᶜ_in_the_whole][]
                                end
                                
                                for l ∈ n .+ L_MINUS_N
                                    if l ∈ L_MINUS_N
                                        linearl = LinearIndices(multiOrdersIndices)[l]
                                        for lᶜ ∈ nᶜ.+ L_MINUS_N
                                            if lᶜ ∈ L_MINUS_N
                                                linearlᶜ = LinearIndices(multiOrdersIndices)[lᶜ]
                                                kernelProducts = 1
                                                for iCoord in eachindex(coordinates)
                                                    l_n_field = Tuple(l-n)[iCoord]
                                                    l_n_variable = Tuple(lᶜ-nᶜ)[iCoord]
                                                    # here I take only the middle_value
                                                    #kernelProducts*=integralBsplineTaylorKernels1D(orderBspline[iCoord],Δ[iCoord],l_n_variable,l_n_field)[1]
                                                    kernelProducts*=integral1DWYYKK[iCoord][pointsIndices[linearμᶜ][iCoord],pointsIndices[linearμ][iCoord],l_n_variable+1,l_n_field+1]
                                                    #kernelProducts*=integralBsplineTaylorKernels1DWithWindow1D(orderBspline[iCoord],WorderBspline[iCoord],pointsIndices[linearμᶜ][iCoord],pointsIndices[linearμ][iCoord],pointsIndices[linearν][iCoord],multiPointsIndices[end][iCoord], Δ[iCoord],l_n_variable,l_n_field)
                                                end
                                                
                                                #nodeValue=Symbol(nodeValue)
                                                #@show localExpression=substitute(nodeValue,localmap)
                                                #@show typeof(nodeValue)
                                                #newExpr = mySimplify.(map((e) -> substitute(e, Dict(localmap)), nodeValue))
                                                
                                                substitutedValue = substitute(nodeValue, localmapηᶜ)

                                                #CoefU +=tmpCˡημᶜ[linearμᶜ_plus_ηᶜ,linearlᶜ]*tmpCˡημ[linearμ_plus_η,linearl]*kernelProducts*substitutedValue*U_HERE
                                                Coef4U_HERE +=tmpCˡημᶜ[linearμᶜ_plus_ηᶜ,linearlᶜ]*tmpCˡημ[linearμ_plus_η,linearl]*kernelProducts*substitutedValue
                                            end
                                        end
                                    end
                                end
                                
                            end
                            
                        end
              
                        Ajiννᶜ[linearμ_plus_η,iField,iExpr]=Coef4U_HERE

                    end
                end
            end
       

            #AjiννᶜU[iExpr] += CoefU
            
        end

In [None]:
tmpAjiννᶜU

# test typage of BsplineTaylorIntegral1D kernels

In [None]:
paramsBsplineTaylorIntegral1D=@strdict BsplineOrder = 1 WBsplineOrder =1 μᶜ = 1 μ = 1 ν = 1 L = 3 Δ = 1.0 l_n_variable = 0 l_n_field = 0
output=myProduceOrLoad(integralBsplineTaylorKernels1DWithWindow1D,paramsBsplineTaylorIntegral1D,"BsplineInt","BsplineTaylorIntegral1D")
         output["kernels"]
                    modμ=output["modμ"]

In [None]:
typeof(output["modμ"])

In [None]:
# inside ASmemiSymbolic

 AjiννᶜU=0
    Ulocal=0

    #region preparation 

    L_MINUS_N = multiOrdersIndices
    L_MINUS_N = L_MINUS_N .-L_MINUS_N[1]

    #endregion

     #region we compute the integral for 1D domain(s)

    integral1DWYYKK = Array{Any,1}(undef,length(coordinates))
    modifiedμ=Array{Any,1}(undef,length(coordinates))
    #integralParams=nothing
    for iCoord in eachindex(coordinates) # for each 
        integralParams = @strdict oB =orderBspline[iCoord] oWB = WorderBspline[iCoord] νCoord=pointsIndices[middleLinearν][iCoord] LCoord = multiPointsIndices[end][iCoord] ΔCoord=Δ[iCoord] l_n_max=L_MINUS_N[end][iCoord]
        output = myProduceOrLoad(getIntegralWYYKK,integralParams,"intKernel")
        @show integral1DWYYKK[iCoord] = output["intKernelforνLΔ"]
        @show modifiedμ[iCoord] = output["modμ"] # this can be still 'nothing'
    end
    @show typeof(integral1DWYYKK[1]), typeof(modifiedμ[1])
    @show size(integral1DWYYKK[1])
    #endregion



In [None]:
typeof(integral1DWYYKK[1])

In [None]:
integral1DWYYKK[1]

In [None]:
@show (integral1DWYYKK[2])

In [None]:
#@profilehtml opt = myProduceOrLoad(makeCompleteCostFunctions,concreteModelParameters,"numOperators","quasiNum")


In [None]:
statprofilehtml()

In [None]:
using InteractiveUtils
run(`open /Users/nobuaki/Documents/Github/flexibleDSM/OPTmotors/myOwnApplications/statprof/index.html`)  

In [None]:
function buggy_function()
    s = 0
    for i in 1:10^7
        s += sin(i)
        if i == 5_000_000
            error("boom!")
        end
    end
    return s
end

try
    @profilehtml buggy_function()
catch err
    @warn "Crashed" err
end

# Show the flamegraph in your browser
statprofilehtml()

In [None]:
using InteractiveUtils
run(`open /Users/nobuaki/Documents/Github/flexibleDSM/OPTmotors/myOwnApplications/statprof/index.html`)  