# Parallel computing on multivectors

In [1]:
using Distributed
addprocs(4);

In [2]:
@everywhere using JOLI
@everywhere using LinearAlgebra

#### Setup problem size, define operator, and input

In [3]:
M=8;
NVC=15;

In [4]:
A=joDFT(M,planned=false);
show(A)

Type: joLinearFunction
Name: joDFT
Size: (8, 8)
 DDT: Float64
 RDT: Complex{Float64}


In [5]:
x=randn(M,NVC)

8×15 Array{Float64,2}:
 -0.233567   1.3582    0.573466   …   0.682527  -0.970062    0.679522
 -0.658011  -0.155475  0.896616      -0.923747  -1.70329    -0.992734
  0.127603  -0.253827  2.1606         0.471299  -0.272386    1.04764 
  0.618609  -0.474054  0.739256       0.192607   1.51645     1.17235 
  0.481497   0.995508  1.70796        0.666186   0.883242    0.795031
  0.677615  -0.595183  0.937017   …   1.19306   -0.166836    1.05681 
 -0.534713   0.628026  0.0629796      0.285508   0.24773     0.405323
  0.681263   0.561292  1.99202       -0.376353  -0.0886528  -1.17941 

## Using Shared Ararys

#### Define parallel setup

In [6]:
ps=joPAsetup((M,NVC));
show(ps)

joPAsetup: joPAsetup
 DataType: Float64
 Dims    : (8, 15)
 Chunks  : [1, 4]
 Workers : [2, 3, 4, 5]
  Worker/ranges:   2 (1:8, 1:4)
  Worker/ranges:   3 (1:8, 5:8)
  Worker/ranges:   4 (1:8, 9:12)
  Worker/ranges:   5 (1:8, 13:15)


#### Define parallel operator

In [7]:
pA=joSAdistributedLinOp(A,ps);
show(pA)

Type: joSAdistributedLinearOperator
Name: joSAdistributedLinearOperator(joDFT)
Size: (8, 8)
 NVC: 15
 DDT: Float64
 RDT: Complex{Float64}


#### Define shared array

In [8]:
px=salloc(ps)
px[:,:]=x[:,:]
px

8×15 SharedArrays.SharedArray{Float64,2}:
 -0.233567   1.3582    0.573466   …   0.682527  -0.970062    0.679522
 -0.658011  -0.155475  0.896616      -0.923747  -1.70329    -0.992734
  0.127603  -0.253827  2.1606         0.471299  -0.272386    1.04764 
  0.618609  -0.474054  0.739256       0.192607   1.51645     1.17235 
  0.481497   0.995508  1.70796        0.666186   0.883242    0.795031
  0.677615  -0.595183  0.937017   …   1.19306   -0.166836    1.05681 
 -0.534713   0.628026  0.0629796      0.285508   0.24773     0.405323
  0.681263   0.561292  1.99202       -0.376353  -0.0886528  -1.17941 

### Just parallel

In [9]:
py=pA*px

8×15 SharedArrays.SharedArray{Complex{Float64},2}:
  0.410226+0.0im        0.729905+0.0im       …   1.05519+0.0im      
 -0.571056+0.115406im   0.496993+0.460691im     -1.14116-0.302648im 
  0.231592+0.452643im    0.69986+0.296241im     0.007632-0.0251496im
   0.06543+0.583734im  -0.240534-0.162873im      1.05949+0.15154im  
 -0.522783+0.0im         1.19901+0.0im           1.01488+0.0im      
   0.06543-0.583734im  -0.240534+0.162873im  …   1.05949-0.15154im  
  0.231592-0.452643im    0.69986-0.296241im     0.007632+0.0251496im
 -0.571056-0.115406im   0.496993-0.460691im     -1.14116+0.302648im 

### Serial to serial via parallel

#### Define distrtibuting and gathering operators

In [10]:
pD=joSAdistribute(pA);
show(pD)

Type: joSAdistribute
Name: joSAdistributeMV:15
Size: (8, 8)
 NVC: 15
 DDT: Float64
 RDT: Float64


In [11]:
pG=joSAgather(pA);
show(pG)

Type: joSAgather
Name: joSAgatherMV:15
Size: (8, 8)
 NVC: 15
 DDT: Complex{Float64}
 RDT: Complex{Float64}


#### Distribute data, apply operator, gather data

In [12]:
px=pD*x

8×15 SharedArrays.SharedArray{Float64,2}:
 -0.233567   1.3582    0.573466   …   0.682527  -0.970062    0.679522
 -0.658011  -0.155475  0.896616      -0.923747  -1.70329    -0.992734
  0.127603  -0.253827  2.1606         0.471299  -0.272386    1.04764 
  0.618609  -0.474054  0.739256       0.192607   1.51645     1.17235 
  0.481497   0.995508  1.70796        0.666186   0.883242    0.795031
  0.677615  -0.595183  0.937017   …   1.19306   -0.166836    1.05681 
 -0.534713   0.628026  0.0629796      0.285508   0.24773     0.405323
  0.681263   0.561292  1.99202       -0.376353  -0.0886528  -1.17941 

In [13]:
py=pA*px

8×15 SharedArrays.SharedArray{Complex{Float64},2}:
  0.410226+0.0im        0.729905+0.0im       …   1.05519+0.0im      
 -0.571056+0.115406im   0.496993+0.460691im     -1.14116-0.302648im 
  0.231592+0.452643im    0.69986+0.296241im     0.007632-0.0251496im
   0.06543+0.583734im  -0.240534-0.162873im      1.05949+0.15154im  
 -0.522783+0.0im         1.19901+0.0im           1.01488+0.0im      
   0.06543-0.583734im  -0.240534+0.162873im  …   1.05949-0.15154im  
  0.231592-0.452643im    0.69986-0.296241im     0.007632+0.0251496im
 -0.571056-0.115406im   0.496993-0.460691im     -1.14116+0.302648im 

In [14]:
y=pG*py

8×15 Array{Complex{Float64},2}:
  0.410226+0.0im        0.729905+0.0im       …   1.05519+0.0im      
 -0.571056+0.115406im   0.496993+0.460691im     -1.14116-0.302648im 
  0.231592+0.452643im    0.69986+0.296241im     0.007632-0.0251496im
   0.06543+0.583734im  -0.240534-0.162873im      1.05949+0.15154im  
 -0.522783+0.0im         1.19901+0.0im           1.01488+0.0im      
   0.06543-0.583734im  -0.240534+0.162873im  …   1.05949-0.15154im  
  0.231592-0.452643im    0.69986-0.296241im     0.007632+0.0251496im
 -0.571056-0.115406im   0.496993-0.460691im     -1.14116+0.302648im 

#### Or all together

In [15]:
ps2s=pG*pA*pD
show(ps2s)

Type: joLinearOperator
Name: ((joSAgatherMV:15*joSAdistributedLinearOperator(joDFT))*joSAdistributeMV:15)
Size: (8, 8)
 DDT: Float64
 RDT: Complex{Float64}


In [16]:
y=ps2s*x

8×15 Array{Complex{Float64},2}:
  0.410226+0.0im        0.729905+0.0im       …   1.05519+0.0im      
 -0.571056+0.115406im   0.496993+0.460691im     -1.14116-0.302648im 
  0.231592+0.452643im    0.69986+0.296241im     0.007632-0.0251496im
   0.06543+0.583734im  -0.240534-0.162873im      1.05949+0.15154im  
 -0.522783+0.0im         1.19901+0.0im           1.01488+0.0im      
   0.06543-0.583734im  -0.240534+0.162873im  …   1.05949-0.15154im  
  0.231592-0.452643im    0.69986-0.296241im     0.007632+0.0251496im
 -0.571056-0.115406im   0.496993-0.460691im     -1.14116+0.302648im 