In [25]:
using LinearAlgebraicRepresentation
Lar = LinearAlgebraicRepresentation
using IntervalTrees
using SparseArrays
using NearestNeighbors
using DataStructures
using OrderedCollections
using BenchmarkTools

## Funzione da ottimizzare

In [26]:
function spaceindex(model::Lar.LAR)::Array{Array{Int,1},1}
    V,CV = model[1:2]
    # se il modello è in 3d o 2d (guardo le righe di V, in 3d V è una 3xN, in 2d V è una 2xN)
    dim = size(V,1)
    cellpoints = [ V[:,CV[k]]::Lar.Points for k=1:length(CV) ]
    #----------------------------------------------------------
    bboxes = [hcat(boundingbox(cell)...) for cell in cellpoints]
    xboxdict = coordintervals(1,bboxes)
    yboxdict = coordintervals(2,bboxes)
    # xs,ys are IntervalTree type
    xs = IntervalTrees.IntervalMap{Float64, Array}()
    for (key,boxset) in xboxdict
        xs[tuple(key...)] = boxset
    end
    ys = IntervalTrees.IntervalMap{Float64, Array}()
    for (key,boxset) in yboxdict
        ys[tuple(key...)] = boxset
    end
    xcovers = boxcovering(bboxes, 1, xs)
    ycovers = boxcovering(bboxes, 2, ys)
    covers = [intersect(pair...) for pair in zip(xcovers,ycovers)]

    if dim == 3
        zboxdict = coordintervals(3,bboxes)
        zs = IntervalTrees.IntervalMap{Float64, Array}()
        for (key,boxset) in zboxdict
            zs[tuple(key...)] = boxset
        end
        zcovers = boxcovering(bboxes, 3, zs)
        covers = [intersect(pair...) for pair in zip(zcovers,covers)]
    end
    # remove each cell from its cover
    for k=1:length(covers)
        covers[k] = setdiff(covers[k],[k])
    end
    return covers
end

spaceindex (generic function with 1 method)

## Dipendenze della funzione

In [27]:
function boundingbox(vertices::Lar.Points)
   minimum = mapslices(x->min(x...), vertices, dims=2)
   maximum = mapslices(x->max(x...), vertices, dims=2)
   return minimum, maximum
end

function coordintervals(coord,bboxes)
    boxdict = OrderedDict{Array{Float64,1},Array{Int64,1}}()
    for (h,box) in enumerate(bboxes)
        key = box[coord,:]
        if haskey(boxdict,key) == false
            boxdict[key] = [h]
        else
            push!(boxdict[key], h)
        end
    end
    return boxdict
end

function boxcovering(bboxes, index, tree)
    covers = [[] for k=1:length(bboxes)]
    for (i,boundingbox) in enumerate(bboxes)
        extent = bboxes[i][index,:]
        iterator = IntervalTrees.intersect(tree, tuple(extent...))
        for x in iterator
            append!(covers[i],x.value)
        end
    end
    return covers
end

boxcovering (generic function with 1 method)

## Dati Input

In [28]:
V = [1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0 64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0 128.0 129.0;
     1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0 64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0 128.0 129.0]

EV = [[1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16],[17,18],[19,20],[21,22],[23,24],[25,26],[27,28],[29,30],[31,32],[33,34],[35,36],[37,38],[39,40],[41,42],[43,44],[45,46],[47,48],[49,50],[51,52],[53,54],[55,56],[57,58],[59,60],[61,62],[63,64],[65,66],[67,68],[69,70],[71,72],[73,74],[75,76],[77,78],[79,80],[81,82],[83,84],[85,86],[87,88],[89,90],[91,92],[93,94],[95,96],[97,98],[99,100],[101,102],[103,104],[105,106],[107,108],[109,110],[111,112],[113,114],[115,116],[117,118],[119,120]]


60-element Array{Array{Int64,1},1}:
 [1, 2]
 [3, 4]
 [5, 6]
 [7, 8]
 [9, 10]
 [11, 12]
 [13, 14]
 [15, 16]
 [17, 18]
 [19, 20]
 [21, 22]
 [23, 24]
 [25, 26]
 ⋮
 [97, 98]
 [99, 100]
 [101, 102]
 [103, 104]
 [105, 106]
 [107, 108]
 [109, 110]
 [111, 112]
 [113, 114]
 [115, 116]
 [117, 118]
 [119, 120]

## 0) Benchmark vecchia funzione 

In [29]:
@btime spaceindex((V,EV))

  1.177 ms (10610 allocations: 543.20 KiB)


60-element Array{Array{Int64,1},1}:
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 ⋮
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []

## 1) Controllo se la funzione è type unstable

In [30]:
@code_warntype spaceindex((V,EV))

Variables
  #self#[36m::Core.Compiler.Const(spaceindex, false)[39m
  model[36m::Tuple{Array{Float64,2},Array{Array{Int64,1},1}}[39m
  @_3[36m::Int64[39m
  #80[36m::var"#80#84"{Array{Float64,2},Array{Array{Int64,1},1}}[39m
  #81[36m::var"#81#85"[39m
  #82[36m::var"#82#86"[39m
  #83[36m::var"#83#87"[39m
  V[36m::Array{Float64,2}[39m
  CV[36m::Array{Array{Int64,1},1}[39m
  dim[36m::Int64[39m
  cellpoints[36m::Array{Array{Float64,2},1}[39m
  bboxes[91m[1m::Array{_A,1} where _A[22m[39m
  xboxdict[36m::OrderedDict{Array{Float64,1},Array{Int64,1}}[39m
  yboxdict[36m::OrderedDict{Array{Float64,1},Array{Int64,1}}[39m
  xs[36m::IntervalTrees.IntervalBTree{Float64,IntervalValue{Float64,Array},64}[39m
  @_16[33m[1m::Union{Nothing, Tuple{Pair{Array{Float64,1},Array{Int64,1}},Int64}}[22m[39m
  ys[36m::IntervalTrees.IntervalBTree{Float64,IntervalValue{Float64,Array},64}[39m
  @_18[33m[1m::Union{Nothing, Tuple{Pair{Array{Float64,1},Array{Int64,1}},Int64}}[22m[

[90m8 ──[39m        (zboxdict = Main.coordintervals(3, bboxes))
[90m│   [39m %100 = IntervalTrees.IntervalMap[36m::Core.Compiler.Const(IntervalTrees.IntervalBTree{K,IntervalValue{K,V},64} where V where K, false)[39m
[90m│   [39m %101 = Core.apply_type(%100, Main.Float64, Main.Array)[36m::Core.Compiler.Const(IntervalTrees.IntervalBTree{Float64,IntervalValue{Float64,Array},64}, false)[39m
[90m│   [39m        (zs = (%101)())
[90m│   [39m %103 = zboxdict[36m::OrderedDict{Array{Float64,1},Array{Int64,1}}[39m
[90m│   [39m        (@_24 = Base.iterate(%103))
[90m│   [39m %105 = (@_24 === nothing)[36m::Bool[39m
[90m│   [39m %106 = Base.not_int(%105)[36m::Bool[39m
[90m└───[39m        goto #11 if not %106
[90m9 ┄─[39m %108 = @_24::Tuple{Pair{Array{Float64,1},Array{Int64,1}},Int64}[36m::Tuple{Pair{Array{Float64,1},Array{Int64,1}},Int64}[39m
[90m│   [39m %109 = Core.getfield(%108, 1)[36m::Pair{Array{Float64,1},Array{Int64,1}}[39m
[90m│   [39m %110 = Base.indexe

la funzione NON è type unstable in quanto ho nell'output la stringa:

    Body::Array{Array{Int64,1},1}
   

## 2) Ottimizzazione cicli con @simd e @inbounds

Abbiamo convertito alcune list comprehension in cicli del tipo for i=1:n .. in modo da poter utilizzare la macro @inbounds per disabilitare il boundchecking del compilatore e la macro @simd.  
  
L'inserimento esplicito della macro simd non ha comportato alcun beneficio, infatti come si apprende dal sito ufficiale Julia: "Note that in many cases, Julia can automatically vectorize code without the @simd macro" (https://docs.julialang.org/en/v1/manual/performance-tips/), quindi noi pensiamo che il codice già sia vettorizzato senza la nostra macro.  
  
La macro @inbounds invece ha ridotto leggermente il numero di allocazioni in memoria.

In [31]:
function spaceindex(model::Lar.LAR)::Array{Array{Int,1},1}
    V,CV = model[1:2]
    # se il modello è in 3d o 2d (guardo le righe di V, in 3d V è una 3xN, in 2d V è una 2xN)
    dim = size(V,1)
    #PARALLELIZZO LA CREAZIONE DEI CELLPOINTS
    n=length(CV)
    cellpoints = Array{Array{Float64,2}}(undef,n)
    @inbounds @simd for k=1:n
        cellpoints[k] = V[:,CV[k]]::Lar.Points
    end
    #PARALLELIZZO LA CREAZIONE DEI BOUNDING BOXES
    bboxes = Array{Array{Float64,2}}(undef,n)
    @inbounds @simd for k=1:n
        bboxes[k] = hcat(boundingbox(cellpoints[k])...)
    end
    xboxdict = coordintervals(1,bboxes)
    yboxdict = coordintervals(2,bboxes)
    # xs,ys are IntervalTree type
    xs = IntervalTrees.IntervalMap{Float64, Array}()
    @inbounds for (key,boxset) in xboxdict
        xs[tuple(key...)] = boxset
    end
    ys = IntervalTrees.IntervalMap{Float64, Array}()
    @inbounds for (key,boxset) in yboxdict
        ys[tuple(key...)] = boxset
    end
    xcovers = boxcovering(bboxes, 1, xs)
    ycovers = boxcovering(bboxes, 2, ys)
    covers = Array{Array{Any,1}}(undef,length(bboxes))
    #PARALLELIZZO LA CREAZIONE DEGLI SPIGOLI INCIDENTI
    @inbounds @simd for i=1:length(xcovers)
        covers[i] = intersect((xcovers[i],ycovers[i])...)
    end
    if dim == 3
        zboxdict = coordintervals(3,bboxes)
        zs = IntervalTrees.IntervalMap{Float64, Array}()
        for (key,boxset) in zboxdict
            zs[tuple(key...)] = boxset
        end
        zcovers = boxcovering(bboxes, 3, zs)
        covers = [intersect(pair...) for pair in zip(zcovers,covers)]
    end
    # remove each cell from its cover
    @inbounds for k=1:length(covers)
        covers[k] = setdiff(covers[k],[k])
    end
    return covers
end

@btime spaceindex((V,EV))

  1.180 ms (10609 allocations: 543.19 KiB)


60-element Array{Array{Int64,1},1}:
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 ⋮
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []

## 3) Parallelizzazione cicli usando i Threads

Per eseguire questa cella, aprire il proprio REPL Julia e aprire questo notebook con i comandi:

ENV["JULIA_NUM_THREADS"] = 4     (o un altro numero)  
using IJulia  
notebook()  
  
Noi studenti, avendo computer dual-core abbiamo fissato in numero di threads pari a 2.   Allocarne di più sarebbe stato inutile (in quanto i threads >2 avrebbero aspettato comunque ..) e anzi, mettendo un numero di threads pari a 4 su un dual-core abbiamo notato un grosso peggioramento delle performance dovuta probabilmente all'attesa.  
  
Il threading funziona bene solo su dati grossi. Su dati piccoli abbiamo visto che non migliorano le performance, anzi peggiorano.

In [32]:
using Base.Threads

function spaceindex(model::Lar.LAR)::Array{Array{Int,1},1}
    V,CV = model[1:2]
    # se il modello è in 3d o 2d (guardo le righe di V, in 3d V è una 3xN, in 2d V è una 2xN)
    dim = size(V,1)
    #PARALLELIZZO LA CREAZIONE DEI CELLPOINTS
    n=length(CV)
    cellpoints = Array{Array{Float64,2}}(undef,n)
    @inbounds @threads for k=1:n
        cellpoints[k] = V[:,CV[k]]::Lar.Points
    end
    #PARALLELIZZO LA CREAZIONE DEI BOUNDING BOXES
    bboxes = Array{Array{Float64,2}}(undef,n)
    @inbounds @threads for k=1:n
        bboxes[k] = hcat(boundingbox(cellpoints[k])...)
    end
    xboxdict = coordintervals(1,bboxes)
    yboxdict = coordintervals(2,bboxes)
    # xs,ys are IntervalTree type
    xs = IntervalTrees.IntervalMap{Float64, Array}()
    @inbounds for (key,boxset) in xboxdict
        xs[tuple(key...)] = boxset
    end
    ys = IntervalTrees.IntervalMap{Float64, Array}()
    @inbounds for (key,boxset) in yboxdict
        ys[tuple(key...)] = boxset
    end
    xcovers = boxcovering(bboxes, 1, xs)
    ycovers = boxcovering(bboxes, 2, ys)
    covers = Array{Array{Any,1}}(undef,length(bboxes))
    #PARALLELIZZO LA CREAZIONE DEGLI SPIGOLI INCIDENTI
    @inbounds @threads for i=1:n
        covers[i] = intersect((xcovers[i],ycovers[i])...)
    end
    if dim == 3
        zboxdict = coordintervals(3,bboxes)
        zs = IntervalTrees.IntervalMap{Float64, Array}()
        for (key,boxset) in zboxdict
            zs[tuple(key...)] = boxset
        end
        zcovers = boxcovering(bboxes, 3, zs)
        covers = [intersect(pair...) for pair in zip(zcovers,covers)]
    end
    # remove each cell from its cover
    @inbounds @simd for k=1:length(covers)
        covers[k] = setdiff(covers[k],[k])
    end
    return covers
end
print("-Numero di Threads allocati: ")
println(nthreads())
@btime spaceindex((V,EV))

-Numero di Threads allocati: 2
  801.753 μs (10645 allocations: 547.91 KiB)


60-element Array{Array{Int64,1},1}:
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 ⋮
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []

## 4) Refactoring del codice  
  
Nella funzione spaceindex risulta evidente come si facciano degli step uguali per 
l'assex, poi per l'assey, e poi per l'assez.  
  
Facendo un refactoring del codice è possibile ciclare sugli assi, applicando la macro 
@threads anche sulla costruzione degli insieme covers relativi ai singoli assi.

In [33]:
using Base.Threads

function spaceindex(model::Lar.LAR)::Array{Array{Int,1},1}
    V,CV = model[1:2]
    # se il modello è in 3d o 2d (guardo le righe di V, in 3d V è una 3xN, in 2d V è una 2xN)
    dim = size(V,1)
    #PARALLELIZZO LA CREAZIONE DEI CELLPOINTS
    n=length(CV)
    cellpoints = Array{Array{Float64,2}}(undef,n)
    @inbounds @threads for k=1:n
        cellpoints[k] = V[:,CV[k]]::Lar.Points
    end
    #PARALLELIZZO LA CREAZIONE DEI BOUNDING BOXES
    bboxes = Array{Array{Float64,2}}(undef,n)
    @inbounds @threads for k=1:n
        bboxes[k] = hcat(boundingbox(cellpoints[k])...)
    end
    coverXYZ= Array{Array{Array{Int64,1},1}}(undef,dim)
    #Per ogni asse x=1, y=2, z=3.....
    @threads for i=1:dim
        boxdict = coordintervals(i,bboxes)
        #Creo interval tree sull'asse i
        intTree = IntervalTrees.IntervalMap{Float64, Array}()
        @inbounds for (key,boxset) in boxdict
            intTree[tuple(key...)] = boxset
        end
        coverXYZ[i] = boxcovering(bboxes, i, intTree)     
    end
    spaceindex = Array{Array{Any,1}}(undef,length(bboxes))
    @inbounds @threads for i=1:n
        spaceindex[i] = intersect((coverXYZ[1][i],coverXYZ[2][i])...)
    end
    if(dim==3)
        @inbounds @threads for i=1:n
             spaceindex[i] = intersect((spaceindex[i],coverXYZ[3][i])...)
        end
    end
    @inbounds @simd for k=1:length(spaceindex)
        spaceindex[k] = setdiff(spaceindex[k],[k])
    end
    return spaceindex
end

print("Numero di threads allocati :")
println(nthreads())
@btime spaceindex((V,EV))

Numero di threads allocati :2
  683.780 μs (10596 allocations: 563.72 KiB)


60-element Array{Array{Int64,1},1}:
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 ⋮
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []

## Risultato finale, parallelizzando anche la funzione boundingbox (presente nelle dipendendeze di spaceindex)

In [35]:
function boundingbox(vertices::Lar.Points)
       d=size(vertices)[1]
       numPoints=size(vertices)[2]
       mins::Array{Float64,1}=[]
       maxs::Array{Float64,1}=[]
       @threads for axis=1:d
               row = vertices[d,:]
               min=vertices[d]
               max=vertices[d]
               @threads for points=2:numPoints
                       if(row[points]<min)
                               min = row[points]
                       end
                       if(row[points]>max)
                               max = row[points]
                       end
               end
               push!(mins,min)
               push!(maxs,max)
       end
  return mins, maxs
end
@btime spaceindex((V,EV))

  228.587 μs (5316 allocations: 311.53 KiB)


60-element Array{Array{Int64,1},1}:
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 ⋮
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []
 []