In [9]:
using LinearAlgebraicRepresentation
Lar = LinearAlgebraicRepresentation
using IntervalTrees
using SparseArrays
using NearestNeighbors
using DataStructures
using OrderedCollections
using BenchmarkTools

## Funzione da ottimizzare

In [10]:
function fragmentlines(model)
    V,EV = model
    # Creo indice spaziale
    Sigma = spaceindex(model)
    # calcolo parametri d'intersezione degli spigoli
    lineparams = linefragments(V,EV,Sigma)
    # initialization of local data structures
    vertdict = OrderedDict{Array{Float64,1},Array{Int,1}}()
    pairs = collect(zip(lineparams, [V[:,e] for e in EV]))
    vertdict = OrderedDict{Array{Float64,1},Int}()
    #Inizializzo nuovi V, EV per aggiungere i nuovi vertici/spigoli dello splitting
    W = Array[]
    EW = Array[]
    k = 0
    # Ricostruisco i nuovi punti generati dall'intersezione tra spigoli
    # tramite i parametri d'intersezione
    # Per ogni spigolo...
    for (params,linepoints) in pairs
        v1 = linepoints[:,1] #Isolo primo punto dello spigolo
        v2 = linepoints[:,2] #Isolo secondo punto dello spigolo
        # Calcolo un array contenente tutti i punti d'intersezione sullo spigolo (tanti quanti
        # sono i parametri d'intersez)			
        points = [ v1 + t*(v2 - v1) for t in params]   # !!!! loved !!
        #Creo un array che conterrà gli id dei punti d'intersezione trovati (verticispigolo)
        vs = zeros(Int64,1,length(points))
        PRECISION = 8
        # Per ogni punto d'intersezione trovato sullo spigolo....
        for (h,point) in enumerate(points)
            #Approssimo coordinate del punto(x,y) trovato di un epsilon 
            point = map(approxVal(PRECISION), point)
            #Se non ho mai visto prima il punto....
            if haskey(vertdict, point) == false
                k += 1 #Genero ID punto 
                vertdict[point] = k #Associo l'ID al punto
                push!(W, point) #Pusho il punto(x,y) nell'array W
            end
            vs[h] = vertdict[point] #Assegno l'id del punto trovato nell'array dei punti d'intersezione
        end
        [push!(EW, [vs[k], vs[k+1]]) for k=1:length(vs)-1]
    end
    #se ho N punti d'intersezione trovati, genero N-1 spigoli 
    #ESEMPIO: se vs=[34,35,36,37] vs[h=1]=34, vs[h=2]=35, vs[h=3]=36, vs[h=4]=37
    # allora andrò a creare le coppie [34,35],[35,36],[36,37] come 3 spigoli. Queste coppie le pusho in EW
    W,EW = hcat(W...),convert(Array{Array{Int64,1},1},EW)
    V,EV = congruence((W,EW))
    return V,EV
end

fragmentlines (generic function with 1 method)

## Dipendenze della funzione

In [11]:
function spaceindex(model::Lar.LAR)::Array{Array{Int,1},1}
    V,CV = model[1:2]
    # se il modello è in 3d o 2d (guardo le righe di V, in 3d V è una 3xN, in 2d V è una 2xN)
    dim = size(V,1)
    cellpoints = [ V[:,CV[k]]::Lar.Points for k=1:length(CV) ]
    #----------------------------------------------------------
    bboxes = [hcat(boundingbox(cell)...) for cell in cellpoints]
    xboxdict = coordintervals(1,bboxes)
    yboxdict = coordintervals(2,bboxes)
    # xs,ys are IntervalTree type
    xs = IntervalTrees.IntervalMap{Float64, Array}()
    for (key,boxset) in xboxdict
        xs[tuple(key...)] = boxset
    end
    ys = IntervalTrees.IntervalMap{Float64, Array}()
    for (key,boxset) in yboxdict
        ys[tuple(key...)] = boxset
    end
    xcovers = boxcovering(bboxes, 1, xs)
    ycovers = boxcovering(bboxes, 2, ys)
    covers = [intersect(pair...) for pair in zip(xcovers,ycovers)]

    if dim == 3
        zboxdict = coordintervals(3,bboxes)
        zs = IntervalTrees.IntervalMap{Float64, Array}()
        for (key,boxset) in zboxdict
            zs[tuple(key...)] = boxset
        end
        zcovers = boxcovering(bboxes, 3, zs)
        covers = [intersect(pair...) for pair in zip(zcovers,covers)]
    end
    # remove each cell from its cover
    for k=1:length(covers)
        covers[k] = setdiff(covers[k],[k])
    end
    return covers
end

function boundingbox(vertices::Lar.Points)
   minimum = mapslices(x->min(x...), vertices, dims=2)
   maximum = mapslices(x->max(x...), vertices, dims=2)
   return minimum, maximum
end

function coordintervals(coord,bboxes)
    boxdict = OrderedDict{Array{Float64,1},Array{Int64,1}}()
    for (h,box) in enumerate(bboxes)
        key = box[coord,:]
        if haskey(boxdict,key) == false
            boxdict[key] = [h]
        else
            push!(boxdict[key], h)
        end
    end
    return boxdict
end

function boxcovering(bboxes, index, tree)
    covers = [[] for k=1:length(bboxes)]
    for (i,boundingbox) in enumerate(bboxes)
        extent = bboxes[i][index,:]
        iterator = IntervalTrees.intersect(tree, tuple(extent...))
        for x in iterator
            append!(covers[i],x.value)
        end
    end
    return covers
end

function linefragments(V,EV,Sigma)
    m = length(Sigma) 
    sigma = map(sort,Sigma) 
    reducedsigma = sigma 
    params = Array{Float64,1}[[] for i=1:m]
    for h=1:m
        if sigma[h] ≠ []
            line1 = V[:,EV[h]]
            for k in sigma[h]
                line2 = V[:,EV[k]]
                out = intersection(line1,line2) 
                if out ≠ ()
                    α,β = out
                    if 0<=α<=1 && 0<=β<=1
                        push!(params[h], α)
                        push!(params[k], β)
                    end
                end
            end
        end
    end
    fragparams = []
    for line in params
        push!(line, 0.0, 1.0)
        line = sort(collect(Set(line)))
        push!(fragparams, line)
    end
    return fragparams
end

function intersection(line1,line2)
    x1,y1,x2,y2 = vcat(line1...)
    x3,y3,x4,y4 = vcat(line2...)

    det = (x4-x3)*(y1-y2)-(x1-x2)*(y4-y3)
    if det != 0.0
        a = 1/det
        b = [y1-y2 x2-x1; y3-y4 x4-x3]  # x1-x2 => x2-x1 bug in the source link !!
        c = [x1-x3; y1-y3]
        (β,α) = a * b * c
    else
        if (y1==y2) == (y3==y4) || (x1==x2) == (x3==x4) # segments collinear
             return nothing
        else
             # segments parallel: no intersection
             return nothing
        end
    end
    return α,β
end

function congruence(model)
    W,EW = model
    balltree = NearestNeighbors.BallTree(W)
    r = 0.0000000001
    near = Array{Any}(undef, size(W,2))
    for k=1:size(W,2)
        near[k] = NearestNeighbors.inrange(balltree, W[:,k], r, true)
    end
    near = map(sort,near) 
    for k=1:size(W,2)
        W[:,k] = W[:,near[k][1]]
    end
    pointidx = [ near[k][1] for k=1:size(W,2) ] 
    invidx = OrderedDict(zip(1:length(pointidx), pointidx))
    V = [W[:,k] for k=1:length(pointidx)]
    EV = []
    for e in (EW)
        newedge = [invidx[e[1]],invidx[e[2]]]
        if newedge[1] !== newedge[2]
            push!(EV,newedge)
        end
    end
    EV = [EV[h] for h=1:length(EV) if length(EV[h])==2]
    EV = convert(Lar.Cells, EV)
    return hcat(V...),EV
end

function approxVal(PRECISION)
    function approxVal0(value)
    out = round(value, digits=PRECISION)
    if out == -0.0
        out = 0.0
    end
    return out
    end
    return approxVal0
end

approxVal (generic function with 1 method)

##

In [12]:
V = [1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0 64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0 128.0 129.0;
     1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0 64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0 128.0 129.0]

EV = [[1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16],[17,18],[19,20],[21,22],[23,24],[25,26],[27,28],[29,30],[31,32],[33,34],[35,36],[37,38],[39,40],[41,42],[43,44],[45,46],[47,48],[49,50],[51,52],[53,54],[55,56],[57,58],[59,60],[61,62],[63,64],[65,66],[67,68],[69,70],[71,72],[73,74],[75,76],[77,78],[79,80],[81,82],[83,84],[85,86],[87,88],[89,90],[91,92],[93,94],[95,96],[97,98],[99,100],[101,102],[103,104],[105,106],[107,108],[109,110],[111,112],[113,114],[115,116],[117,118],[119,120]]


60-element Array{Array{Int64,1},1}:
 [1, 2]
 [3, 4]
 [5, 6]
 [7, 8]
 [9, 10]
 [11, 12]
 [13, 14]
 [15, 16]
 [17, 18]
 [19, 20]
 [21, 22]
 [23, 24]
 [25, 26]
 ⋮
 [97, 98]
 [99, 100]
 [101, 102]
 [103, 104]
 [105, 106]
 [107, 108]
 [109, 110]
 [111, 112]
 [113, 114]
 [115, 116]
 [117, 118]
 [119, 120]

## 0) Benchmark vecchia funzione 

In [13]:
@btime fragmentlines((V,EV))

  2.326 ms (15428 allocations: 853.45 KiB)


([1.0 2.0 … 119.0 120.0; 1.0 2.0 … 119.0 120.0], [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20]  …  [101, 102], [103, 104], [105, 106], [107, 108], [109, 110], [111, 112], [113, 114], [115, 116], [117, 118], [119, 120]])

## 1) Controllo se la funzione è type unstable


In [14]:
@code_warntype fragmentlines((V,EV))

Variables
  #self#[36m::Core.Compiler.Const(fragmentlines, false)[39m
  model[36m::Tuple{Array{Float64,2},Array{Array{Int64,1},1}}[39m
  @_3[36m::Int64[39m
  #54[36m::var"#54#57"[39m
  @_5[36m::Int64[39m
  Sigma[36m::Array{Array{Int64,1},1}[39m
  lineparams[36m::Array{Any,1}[39m
  pairs[91m[1m::Any[22m[39m
  vertdict[91m[1m::Union{OrderedDict{Array{Float64,1},Array{Int64,1}}, OrderedDict{Array{Float64,1},Int64}}[22m[39m
  k[36m::Int64[39m
  @_11[91m[1m::Any[22m[39m
  W[91m[1m::Any[22m[39m
  EW@_13[91m[1m::Core.Box[22m[39m
  V@_14[91m[1m::Core.Box[22m[39m
  EV[91m[1m::Any[22m[39m
  params[91m[1m::Any[22m[39m
  linepoints[91m[1m::Any[22m[39m
  @_18[91m[1m::Any[22m[39m
  #55[91m[1m::var"#55#58"{_A,_B} where _B where _A[22m[39m
  #56[36m::var"#56#59"{Array{Int64,2}}[39m
  v1[91m[1m::Any[22m[39m
  v2[91m[1m::Any[22m[39m
  points[91m[1m::Any[22m[39m
  vs[36m::Array{Int64,2}[39m
  PRECISION[36m::Int64[39m
  @_26[33

[90m│   [39m %119 = Core.isdefined(EW@_13, :contents)[36m::Bool[39m
[90m└───[39m        goto #14 if not %119
[90m13 ─[39m        goto #15
[90m14 ─[39m        Core.NewvarNode(:(EW@_31))
[90m└───[39m        EW@_31
[90m15 ┄[39m %124 = Core.getfield(EW@_13, :contents)[91m[1m::Any[22m[39m
[90m│   [39m %125 = Main.convert(%118, %124)[91m[1m::Any[22m[39m
[90m│   [39m        (W = %116)
[90m│   [39m        Core.setfield!(EW@_13, :contents, %125)
[90m│   [39m %128 = W[91m[1m::Any[22m[39m
[90m│   [39m %129 = Core.isdefined(EW@_13, :contents)[36m::Bool[39m
[90m└───[39m        goto #17 if not %129
[90m16 ─[39m        goto #18
[90m17 ─[39m        Core.NewvarNode(:(EW@_32))
[90m└───[39m        EW@_32
[90m18 ┄[39m %134 = Core.getfield(EW@_13, :contents)[91m[1m::Any[22m[39m
[90m│   [39m %135 = Core.tuple(%128, %134)[91m[1m::Tuple{Any,Any}[22m[39m
[90m│   [39m %136 = Main.congruence(%135)[91m[1m::Tuple{Any,Any}[22m[39m
[90m│   [39m %137 =

la funzione NON è type unstable in quanto ho nell'output la stringa:
    
    Body::Tuple{Any,Any}

## 2) Ottimizzazione cicli con @simd e @inbounds

Abbiamo convertito alcune list comprehension in cicli del tipo for i=1:n .. in modo da poter utilizzare la macro @inbounds per disabilitare il boundchecking del compilatore e la macro @simd.  
  
L'inserimento esplicito della macro simd non ha comportato alcun beneficio, infatti come si apprende dal sito ufficiale Julia: "Note that in many cases, Julia can automatically vectorize code without the @simd macro" (https://docs.julialang.org/en/v1/manual/performance-tips/), quindi noi pensiamo che il codice già sia vettorizzato senza la nostra macro.  
  
La macro @inbounds invece ha ridotto leggermente il numero di allocazioni in memoria.

In [15]:
function fragmentlines2(model)
    V,EV = model
    Sigma = spaceindex(model)
    lineparams = linefragments(V,EV,Sigma)
    vertdict = OrderedDict{Array{Float64,1},Array{Int,1}}()
    pairs = collect(zip(lineparams, [V[:,e] for e in EV]))
    vertdict = OrderedDict{Array{Float64,1},Int}()
    W = Array[]
    EW = Array[]
    k = 0
    l = length(pairs)
    @inbounds @simd for i = 1:l
        params = pairs[i][1]
        linepoints = pairs[i][2]
        v1 = linepoints[:,1] #Isolo primo punto dello spigolo
        v2 = linepoints[:,2] #Isolo secondo punto dello spigolo
        points = [ v1 + t*(v2 - v1) for t in params]   # !!!! loved !!
        vs = zeros(Int64,1,length(points))
        PRECISION = 8
        numpoint = length(points)
        @inbounds @simd for h = 1:numpoint
            points[h] = map(approxVal(PRECISION), points[h])
            if !haskey(vertdict, points[h])
                k += 1 #Genero ID punto 
                vertdict[points[h]] = k #Associo l'ID al punto
                push!(W, points[h]) #Pusho il punto(x,y) nell'array W
            end
            vs[h] = vertdict[points[h]] 
        end
        m = length(vs) - 1
        @inbounds @simd for k=1:m
            push!(EW, [vs[k], vs[k+1]])
        end
    end
    W,EW = hcat(W...),convert(Array{Array{Int64,1},1},EW)
    V,EV = congruence((W,EW))
    return V,EV
end

@btime fragmentlines2((V, EV))

  2.378 ms (14709 allocations: 825.38 KiB)


([1.0 2.0 … 119.0 120.0; 1.0 2.0 … 119.0 120.0], [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20]  …  [101, 102], [103, 104], [105, 106], [107, 108], [109, 110], [111, 112], [113, 114], [115, 116], [117, 118], [119, 120]])

## 3) Parallelizzazione cicli usando i Threads

Purtroppo non è stato possibile utilizzare i threads per il ciclo principale della funzione, in quanto gli orderedDict fanno sorgere delle eccezioni nei task parallelizzati. Inoltre non è stata aggiunta l'annotazione @threads per l'ultimo ciclo in quanto si è notato che faceva peggiorare le performance

## 4) Refactoring del codice  
  
Nell'if è stata cambiata la condizione a !haskey(vertdict, points[h]) per evitare un confronto con ==.

## 5) Risultato finale, parallelizzando anche le funzioni presenti nelle dipendenze di fragmentlines (spaceindex,linefragments,intersection,boundingbox,congruence)

In [16]:
using Base.Threads

function boundingbox(vertices::Lar.Points)
    d=size(vertices)[1]
    numPoints=size(vertices)[2]
    #inizializzo gli array da ritornare [xMin, yMin, zMin] e [xMax, yMax, zMax]
    mins = zeros(d,1)
    maxs = zeros(d,1)
    for i=1:d
        mins[i]=vertices[i]
        maxs[i]=vertices[i]
    end
    @threads for i=2:numPoints
        @threads for j=1:d
            if(vertices[j+d*(i-1)] > maxs[j])
                maxs[j] = vertices[j+d*(i-1)]
            end
            if(vertices[j+d*(i-1)] < mins[j])
                mins[j] = vertices[j+d*(i-1)]
            end
        end
    end

    return (mins,maxs)
end

function spaceindex(model::Lar.LAR)::Array{Array{Int,1},1}
    V,CV = model[1:2]
    # se il modello è in 3d o 2d (guardo le righe di V, in 3d V è una 3xN, in 2d V è una 2xN)
    dim = size(V,1)
    #PARALLELIZZO LA CREAZIONE DEI CELLPOINTS
    n=length(CV)
    cellpoints = Array{Array{Float64,2}}(undef,n)
    @inbounds @threads for k=1:n
        cellpoints[k] = V[:,CV[k]]::Lar.Points
    end
    #PARALLELIZZO LA CREAZIONE DEI BOUNDING BOXES
    bboxes = Array{Array{Float64,2}}(undef,n)
    @inbounds @threads for k=1:n
        bboxes[k] = hcat(boundingbox(cellpoints[k])...)
    end
    coverXYZ= Array{Array{Array{Int64,1},1}}(undef,dim)
    #Per ogni asse x=1, y=2, z=3.....
    @threads for i=1:dim
        boxdict = coordintervals(i,bboxes)
        #Creo interval tree sull'asse i
        intTree = IntervalTrees.IntervalMap{Float64, Array}()
        @inbounds for (key,boxset) in boxdict
            intTree[tuple(key...)] = boxset
        end
        coverXYZ[i] = boxcovering(bboxes, i, intTree)     
    end
    spaceindex = Array{Array{Any,1}}(undef,length(bboxes))
    @inbounds @threads for i=1:n
        spaceindex[i] = intersect((coverXYZ[1][i],coverXYZ[2][i])...)
    end
    if(dim==3)
        @inbounds @threads for i=1:n
             spaceindex[i] = intersect((spaceindex[i],coverXYZ[3][i])...)
        end
    end
    @inbounds @simd for k=1:length(spaceindex)
        spaceindex[k] = setdiff(spaceindex[k],[k])
    end
    return spaceindex
end

function linefragments(V,EV,sigma)
    m = length(sigma) 
    sigma = map(sort,sigma) 
    params = Array{Array{Float64,1}}(undef,m)
    @threads for i=1:m
        params[i] = []
    end
    line1=[0.0 0.0; 0.0 0.0]
    line2=[0.0 0.0; 0.0 0.0]
    @threads for h=1:m
        if sigma[h] ≠ []
            line1 = V[:,EV[h]]
            @threads for k in sigma[h]
            line2 = V[:,EV[k]]
                out = intersection(line1,line2) 
                if out ≠ ()
                    if 0<=out[1]<=1 && 0<=out[2]<=1
                        push!(params[h], out[1])
                        push!(params[k], out[2])
                    end
                end
            end
        end
        end
    len = length(params)
    @threads for i=1:len
        push!(params[i], 0.0, 1.0)
        params[i] = sort(collect(Set(params[i])))
    end
    return params
end

function congruence(model)
    W,EW = model
    n = size(W,2)
    balltree = NearestNeighbors.BallTree(W)
    r = 0.0000000001
    near = Array{Any}(undef, n)
    @inbounds @threads for k=1:n
        near[k] = NearestNeighbors.inrange(balltree, W[:,k], r, true)
    end
    near = map(sort,near) 
    @inbounds @threads for k=1:n
        W[:,k] = W[:,near[k][1]]
    end
    pointidx = Array{Int64}(undef, n)
    @inbounds @threads for k=1:n
         pointidx[k] = near[k][1] 
    end
    l = length(pointidx)
    invidx = OrderedDict(zip(1:l, pointidx))
    V = Array{Array{Float64,1}}(undef, l)
    @inbounds @threads for k=1:l
        V[k] = W[:,k] 
    end
    
    EV = []
    m = length(EW)
    @inbounds for i = 1:m
        newedge = [invidx[EW[i][1]],invidx[EW[i][2]]]
        if newedge[1] !== newedge[2]
            push!(EV,newedge)
        end
    end
    filter!(x ->  length(x)==2, EV)
    EV = convert(Lar.Cells, EV)
    return hcat(V...),EV
end

@btime fragmentlines2((V, EV))

  1.126 ms (8217 allocations: 564.92 KiB)


([1.0 2.0 … 119.0 120.0; 1.0 2.0 … 119.0 120.0], [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20]  …  [101, 102], [103, 104], [105, 106], [107, 108], [109, 110], [111, 112], [113, 114], [115, 116], [117, 118], [119, 120]])