## Section 3: Distributed Memory Assembly of Matrix and Right-Hand Side Vector 
- using [PartitionedMatrices](https://github.com/fverdugo/PartitionedArrays.jl); need to study the example described at [example](https://www.francescverdugo.com/PartitionedArrays.jl/dev/usage/).  

<div>
<img src="./figures/dag-fem-1d.jpg" width=400 /> 
<center> Figure 1: Directed acyclic graph representation of decomposed mesh of 4 elements on the interval. A finite element discretization is assumed. </center>   
</div>

[Au = f example here.](https://www.francescverdugo.com/PartitionedArrays.jl/dev/examples/#Distributed-sparse-linear-solve)

In [1]:
using PartitionedArrays
using IterativeSolvers
using LinearAlgebra

In [2]:
#..first generate the row partition
np = 3
N = 10
ranks = LinearIndices((np,))
row_partition = uniform_partition(ranks,N+1)

3-element Vector{PartitionedArrays.LocalIndicesWithConstantBlockSize{1}}:
 [1, 2, 3]
 [4, 5, 6, 7]
 [8, 9, 10, 11]

In [3]:
#..construct the mesh: see before 
h = 1/N; 
x = Vector(0:h:1); 

#..Mesh with points and edges 
#..point holds the coordinates of the left and right node of the element
#..edges holds the global indices of the left and right node of the element
points = collect( [x[i], x[i+1]] for i in 1:length(x)-1) 
edges = collect( [i, i+1] for i in 1:length(x)-1); 

#..Set the source function 
fsource(x) = x*(x-1); 

Something to compare with

In [4]:
#..Initialize global matrix and right-hand side value 
A = zeros(length(x), length(x)); 
f = zeros(length(x), 1); 

#..Perform loop over elements and assemble global matrix and vector 
for i=1:length(edges) 

  xl, xr = points[i,:][1]
  floc = (xr-xl) * [fsource(xl) fsource(xr)];
  Aloc = (1/(xr-xl))*[1 -1; -1 1]; 

  for j=1:2 
    f[edges[i][j]] += floc[j];
    for k =1:2 
      A[edges[i][j], edges[i][k]] += Aloc[j,k]; 
    end 
  end 

end

f_orig = copy(f)
A_orig = copy(A)
f

11×1 Matrix{Float64}:
  0.0
 -0.018000000000000002
 -0.032
 -0.041999999999999996
 -0.048
 -0.04999999999999999
 -0.04799999999999999
 -0.04200000000000002
 -0.032
 -0.01799999999999999
  0.0

Compute the rhs vector

In [5]:
j = 2
IV = map(row_partition) do row_indices
    I,V = Int[], Float64[]
    for global_row in local_to_global(row_indices) 
        # xl, xr = points[global_row,:][1]
        # floc = (xr-xl) * [fsource(xl) fsource(xr)]
        # f[edges[global_row][j]] += floc[j]
        # xlp1, xrp1 = points[global_row+1,:][1]
        # floc = (xrp1-xlp1) * [fsource(xlp1) fsource(xrp1)]
        # f[edges[global_row][2]] += floc[2]
        if global_row == 1
            xl, xr = points[global_row,:][1]
            floc = (xr-xl) * [fsource(xl) fsource(xr)]
            v = floc[1]
        elseif global_row == N+1
            xl, xr = points[global_row-1,:][1]
            floc = (xr-xl) * [fsource(xl) fsource(xr)]
            v = floc[2]
        else
            xll, xrl = points[global_row-1,:][1]
            flocl = (xrl-xll) * [fsource(xll) fsource(xrl)]
            xlr, xrr = points[global_row,:][1]
            flocr = (xrr-xlr) * [fsource(xlr) fsource(xrr)]
            v = flocr[1] + flocl[2]
        end
        push!(I,global_row)
        push!(V,v)
    end
    I,V
end
I,V = tuple_of_arrays(IV)
b = pvector!(I,V,row_partition) |> fetch
b

11-element PVector{Vector{Float64}} partitioned into 3 parts


In [6]:
V

3-element Vector{Vector{Float64}}:
 [-0.0, -0.018000000000000002, -0.032]
 [-0.041999999999999996, -0.048, -0.04999999999999999, -0.04799999999999999]
 [-0.04200000000000002, -0.032, -0.01799999999999999, 0.0]

Compute the system matrix

In [7]:
IJV = map(row_partition) do row_indices
    I,J,V = Int[], Int[], Float64[]
    for global_row in local_to_global(row_indices)
        if global_row in (1,n)
            push!(I,global_row)
            push!(J,global_row)
            push!(V,1.0)
        else
            push!(I,global_row)
            push!(J,global_row-1)
            push!(V,-1.0)
            push!(I,global_row)
            push!(J,global_row)
            push!(V,2.0)
            push!(I,global_row)
            push!(J,global_row+1)
            push!(V,-1.0)
        end
    end
    I,J,V
end
I,J,V = tuple_of_arrays(IJV)
col_partition = row_partition
A = psparse!(I,J,V,row_partition,col_partition) |> fetch

LoadError: UndefVarError: n not defined

Generate an initial guess that fulfills the boundary conditions. Solve and check the result

In [8]:
x = similar(b,axes(A,2))
x .= b
IterativeSolvers.cg!(x,A,b)
r = A*x - b
norm(r)

LoadError: MethodError: no method matching ndims(::Type{PartitionedArrays.PBroadcasted{Vector{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(identity), Tuple{SubArray{Float64, 1, Vector{Float64}, Tuple{UnitRange{Int32}}, true}}}}, Vector{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(identity), Tuple{SubArray{Float64, 1, Vector{Float64}, Tuple{UnitRange{Int32}}, true}}}}, Vector{PartitionedArrays.LocalIndicesWithConstantBlockSize{1}}}})
[0mClosest candidates are:
[0m  ndims([91m::Base.Broadcast.Broadcasted{<:Any, <:Tuple{Vararg{Any, N}}}[39m) where N at broadcast.jl:247
[0m  ndims([91m::Base.Iterators.ProductIterator[39m) at iterators.jl:1015
[0m  ndims([91m::CartesianIndices[39m) at multidimensional.jl:380
[0m  ...