In [9]:
using LinearAlgebraicRepresentation
Lar = LinearAlgebraicRepresentation
using IntervalTrees
using SparseArrays
using NearestNeighbors
using DataStructures
using OrderedCollections
using BenchmarkTools
using Base.Threads
using LinearAlgebra

## Funzione da ottimizzare

In [10]:
function frag_edge(V, EV::Lar.ChainOp, edge_idx::Int, bigPI)
    alphas = Dict{Float64, Int}()
    edge = EV[edge_idx, :]
    verts = V[edge.nzind, :]
    for i in bigPI[edge_idx]
        if i != edge_idx
            intersection = intersect_edges(V, edge, EV[i, :])
            for (point, alpha) in intersection
                verts = [verts; point]
                alphas[alpha] = size(verts, 1)
            end
        end
    end
    alphas[0.0], alphas[1.0] = [1, 2]
    alphas_keys = sort(collect(keys(alphas)))
    edge_num = length(alphas_keys)-1
    verts_num = size(verts, 1)
    ev = SparseArrays.spzeros(Int8, edge_num, verts_num)
    for i in 1:edge_num
        ev[i, alphas[alphas_keys[i]]] = 1
        ev[i, alphas[alphas_keys[i+1]]] = 1
    end
    return verts, ev
end

frag_edge (generic function with 1 method)

## Dipendenze della funzione

In [11]:
function intersect_edges(V::Lar.Points, edge1::Lar.Cell, edge2::Lar.Cell)
    err = 10e-8

    x1, y1, x2, y2 = vcat(map(c->V[c, :], edge1.nzind)...)
    x3, y3, x4, y4 = vcat(map(c->V[c, :], edge2.nzind)...)
    ret = Array{Tuple{Lar.Points, Float64}, 1}()

    v1 = [x2-x1, y2-y1];
    v2 = [x4-x3, y4-y3];
    v3 = [x3-x1, y3-y1];
    ang1 = dot(normalize(v1), normalize(v2))
    ang2 = dot(normalize(v1), normalize(v3))
    parallel = 1-err < abs(ang1) < 1+err
    colinear = parallel && (1-err < abs(ang2) < 1+err || -err < norm(v3) < err)
    if colinear
        o = [x1 y1]
        v = [x2 y2] - o
        alpha = 1/dot(v,v')
        ps = [x3 y3; x4 y4]
        for i in 1:2
            a = alpha*dot(v',(reshape(ps[i, :], 1, 2)-o))
            if 0 < a < 1
                push!(ret, (ps[i:i, :], a))
            end
        end
    elseif !parallel
        denom = (v2[2])*(v1[1]) - (v2[1])*(v1[2])
        a = ((v2[1])*(-v3[2]) - (v2[2])*(-v3[1])) / denom
        b = ((v1[1])*(-v3[2]) - (v1[2])*(-v3[1])) / denom

        if -err < a < 1+err && -err <= b <= 1+err
            p = [(x1 + a*(x2-x1))  (y1 + a*(y2-y1))]
            push!(ret, (p, a))
        end
    end
    return ret
end

intersect_edges (generic function with 1 method)

## Dati di input

In [12]:
b=[[2.0, 250.0, 2.0, 250.0],[2.0, 2.0, 0.0, 0.0]]
EV=[[1,2],[1,3],[3,4],[2,4]]

for i=1:60
           push!(b[1],(1.0 + i*4.0))
           push!(b[2],(1.0))
           push!(b[1],(4.0 + i*4.0))
           push!(b[2],(1.0))
           push!(b[1],(1.0 + i*4.0))
           push!(b[2],(4.0))
           push!(b[1],(4.0 + i*4.0))
           push!(b[2],(4.0))
           
           push!(EV,[1+4*(i-1),2+4*(i-1)])
           push!(EV,[1+4*(i-1),3+4*(i-1)])
           push!(EV,[2+4*(i-1),4+4*(i-1)])
           push!(EV,[3+4*(i-1),4+4*(i-1)])
end

V = permutedims(reshape(hcat(b...), (length(b[1]), length(b))))
filter!(e->e!=[1,1],EV)

W = convert(Lar.Points, V')
cop_EV = Lar.coboundary_0(EV::Lar.Cells)
cop_EW = convert(Lar.ChainOp, cop_EV)
bigPI = Lar.spaceindex((V, EV))

244-element Array{Array{Int64,1},1}:
 [2, 6, 5, 10, 11, 14, 15, 18, 19, 22  …  230, 231, 234, 235, 238, 239, 242, 243, 4, 7]
 [6, 1, 3, 5, 8]
 [2, 6, 8, 4, 7]
 [1, 3, 5, 8, 7]
 [2, 6, 1, 10, 11, 14, 15, 18, 19, 22  …  230, 231, 234, 235, 238, 239, 242, 243, 4, 7]
 [2, 1, 3, 5, 8]
 [1, 3, 5, 8, 4]
 [2, 6, 3, 4, 7]
 [10, 11]
 [1, 5, 9, 12]
 [1, 5, 9, 12]
 [10, 11]
 [14, 15]
 ⋮
 [234, 235]
 [1, 5, 233, 236]
 [1, 5, 233, 236]
 [234, 235]
 [238, 239]
 [1, 5, 237, 240]
 [1, 5, 237, 240]
 [238, 239]
 [242, 243]
 [1, 5, 241, 244]
 [1, 5, 241, 244]
 [242, 243]

## Benchmark vecchia funzione

In [13]:
@btime frag_edge(W, cop_EV, 1, bigPI)

  2.808 ms (10227 allocations: 522.28 KiB)


([2.0 2.0; 250.0 2.0; … ; 250.0 2.0; 250.0 2.0], 
  [1  ,   1]  =  1
  [119,   2]  =  1
  [1  ,   5]  =  1
  [2  ,   5]  =  1
  [2  ,   6]  =  1
  [3  ,   6]  =  1
  [3  ,   7]  =  1
  [4  ,   7]  =  1
  [4  ,   8]  =  1
  [5  ,   8]  =  1
  [5  ,   9]  =  1
  [6  ,   9]  =  1
  ⋮
  [113, 116]  =  1
  [113, 117]  =  1
  [114, 117]  =  1
  [114, 118]  =  1
  [115, 118]  =  1
  [115, 119]  =  1
  [116, 119]  =  1
  [116, 120]  =  1
  [117, 120]  =  1
  [117, 121]  =  1
  [118, 121]  =  1
  [118, 122]  =  1
  [119, 122]  =  1)

## Controllo se la funzione è type unstable

In [14]:
@code_warntype frag_edge(W, cop_EV, 1, bigPI)

Variables
  #self#[36m::Core.Compiler.Const(frag_edge, false)[39m
  V[36m::Array{Float64,2}[39m
  EV[36m::SparseMatrixCSC{Int8,Int64}[39m
  edge_idx[36m::Int64[39m
  bigPI[36m::Array{Array{Int64,1},1}[39m
  @_6[36m::Int64[39m
  alphas[36m::Dict{Float64,Int64}[39m
  edge[36m::SparseVector{Int8,Int64}[39m
  verts[91m[1m::Any[22m[39m
  @_10[33m[1m::Union{Nothing, Tuple{Int64,Int64}}[22m[39m
  alphas_keys[36m::Array{Float64,1}[39m
  edge_num[36m::Int64[39m
  verts_num[91m[1m::Any[22m[39m
  ev[36m::SparseMatrixCSC{Int8,Int64}[39m
  @_15[33m[1m::Union{Nothing, Tuple{Int64,Int64}}[22m[39m
  i@_16[36m::Int64[39m
  intersection[91m[1m::Array{_A,1} where _A[22m[39m
  @_18[33m[1m::Union{Nothing, Tuple{Any,Int64}}[22m[39m
  point[91m[1m::Any[22m[39m
  alpha[91m[1m::Any[22m[39m
  @_21[91m[1m::Any[22m[39m
  i@_22[36m::Int64[39m

Body[91m[1m::Tuple{Any,SparseMatrixCSC{Int8,Int64}}[22m[39m
[90m1 ──[39m       Core.NewvarNode(:(@_6))
[

La funzione è type stable in quanto ritorna la stringa:  
    
    Body::Tuple{Any,SparseMatrixCSC{Int8,Int64}}


## Parallelizzazione cicli con i threads

Per riuscire a parallelizzare questa funzione è stato fatto un refactoring massiccio del codice. La costruzione dei vertici adesso è affidata a threads che singolarmente calcolano una parte dell'insieme che poi successivamente sara fuso con gli altri. La costruzione di ev invece, dipendendo da un dizionario, è stata più difficile in quanto l'etichettatura dei nodi dipendeva dall'iterazione sull'indice spaziale. Siccome le iterazioni sono splittate tra thread non si conosceva più il progressivo id del vertice.  
  
Aggiungendo una porzione di codice che etichetta i nodi iterando su gli alpha trovati ad ogni iterazione siamo riusciti a risolvere il problema migliorando le prestazioni.

In [18]:
function frag_edge2(V, EV::Lar.ChainOp, edge_idx::Int, bigPI)
    #Thread safety data structures    
    nth = nthreads()
    lbp = length(bigPI[edge_idx])
    alphaT=Array{Array{Float64}}(undef, lbp)
    vertsT = Array{Array{Float64,2}}(undef, nth)
    for i=1:nth
         vertsT[i] = Array{Float64,2}(undef,0,2)
    end
    edge = EV[edge_idx, :]
    @threads for it=1:lbp
        alphaT[it] = Array{Float64}(undef,0)
        tid = threadid() #Thread associato all'iterazione corrente it
        i=bigPI[edge_idx][it] #Edge da intersecare
        if i != edge_idx
            intersection = intersect_edges(V, edge, EV[i, :])
            for (point, alpha) in intersection
                vertsT[tid] = [vertsT[tid]; point]
                push!(alphaT[it],alpha) 
            end
        end
    end
    #Inizializzo strutture da ritornare
    verts = V[edge.nzind, :]
    for i=1:nth
        verts = [verts; vertsT[i]]
    end
    alphas = Dict{Float64, Int}()
    n=3
    for it=1:length(alphaT)
        for alpha in alphaT[it]
            alphas[alpha] = n
            n=n+1
        end
    end
    alphas[0.0], alphas[1.0] = [1, 2]
    alphas_keys = sort(collect(keys(alphas)))
    edge_num = length(alphas_keys)-1
    verts_num = size(verts, 1)
    ev = SparseArrays.spzeros(Int8, edge_num, verts_num)
    for i in 1:edge_num
        ev[i, alphas[alphas_keys[i]]] = 1
        ev[i, alphas[alphas_keys[i+1]]] = 1
    end
    return verts, ev
end

@btime frag_edge2(W, cop_EV, 1, bigPI)

  1.527 ms (10610 allocations: 480.70 KiB)


([2.0 2.0; 250.0 2.0; … ; 250.0 2.0; 250.0 2.0], 
  [1  ,   1]  =  1
  [119,   2]  =  1
  [1  ,   5]  =  1
  [2  ,   5]  =  1
  [2  ,   6]  =  1
  [3  ,   6]  =  1
  [3  ,   7]  =  1
  [4  ,   7]  =  1
  [4  ,   8]  =  1
  [5  ,   8]  =  1
  [5  ,   9]  =  1
  [6  ,   9]  =  1
  ⋮
  [113, 116]  =  1
  [113, 117]  =  1
  [114, 117]  =  1
  [114, 118]  =  1
  [115, 118]  =  1
  [115, 119]  =  1
  [116, 119]  =  1
  [116, 120]  =  1
  [117, 120]  =  1
  [117, 121]  =  1
  [118, 121]  =  1
  [118, 122]  =  1
  [119, 122]  =  1)