In [1]:
using LinearAlgebraicRepresentation
Lar = LinearAlgebraicRepresentation
using IntervalTrees
using SparseArrays
using NearestNeighbors
using DataStructures
using OrderedCollections
using BenchmarkTools

## Funzione da ottimizzare

In [2]:
function merge_vertices!(V::Lar.Points, EV::Lar.ChainOp, edge_map, err=1e-4)
    vertsnum = size(V, 1)
    edgenum = size(EV, 1)
    newverts = zeros(Int, vertsnum)
    # KDTree constructor needs an explicit array of Float64
    V = Array{Float64,2}(V)
    kdtree = KDTree(permutedims(V))

    # merge congruent vertices
    todelete = []
    i = 1
    for vi in 1:vertsnum
        if !(vi in todelete)
            nearvs = Lar.inrange(kdtree, V[vi, :], err)
            newverts[nearvs] .= i
            nearvs = setdiff(nearvs, vi)
            todelete = union(todelete, nearvs)
            i = i + 1
        end
    end
    nV = V[setdiff(collect(1:vertsnum), todelete), :]

    # merge congruent edges
    edges = Array{Tuple{Int, Int}, 1}(undef, edgenum)
    oedges = Array{Tuple{Int, Int}, 1}(undef, edgenum)
    for ei in 1:edgenum
        v1, v2 = EV[ei, :].nzind
        edges[ei] = Tuple{Int, Int}(sort([newverts[v1], newverts[v2]]))
        oedges[ei] = Tuple{Int, Int}(sort([v1, v2]))
    end
    nedges = union(edges)
    nedges = filter(t->t[1]!=t[2], nedges)
    nedgenum = length(nedges)
    nEV = spzeros(Int8, nedgenum, size(nV, 1))
    # maps pairs of vertex indices to edge index
    etuple2idx = Dict{Tuple{Int, Int}, Int}()
    # builds `edge_map`
    for ei in 1:nedgenum
        nEV[ei, collect(nedges[ei])] .= 1
        etuple2idx[nedges[ei]] = ei
    end
    for i in 1:length(edge_map)
        row = edge_map[i]
        row = map(x->edges[x], row)
        row = filter(t->t[1]!=t[2], row)
        row = map(x->etuple2idx[x], row)
        edge_map[i] = row
    end
    # return new vertices and new edges
    return Lar.Points(nV), nEV
end

merge_vertices! (generic function with 2 methods)

## Dati in input

In [3]:
b=[[2.0, 250.0, 2.0, 250.0],[2.0, 2.0, 0.0, 0.0]]
EV=[[1,2],[1,3],[3,4],[2,4]]

for i=1:60
           push!(b[1],(1.0 + i*4.0))
           push!(b[2],(1.0))
           push!(b[1],(4.0 + i*4.0))
           push!(b[2],(1.0))
           push!(b[1],(1.0 + i*4.0))
           push!(b[2],(4.0))
           push!(b[1],(4.0 + i*4.0))
           push!(b[2],(4.0))
           
           push!(EV,[1+4*(i-1),2+4*(i-1)])
           push!(EV,[1+4*(i-1),3+4*(i-1)])
           push!(EV,[2+4*(i-1),4+4*(i-1)])
           push!(EV,[3+4*(i-1),4+4*(i-1)])
end

V = permutedims(reshape(hcat(b...), (length(b[1]), length(b))))
filter!(e->e!=[1,1],EV)

W = convert(Lar.Points, V')
copEV = Lar.coboundary_0(EV::Lar.Cells)
bigPI = Lar.spaceindex((V, EV))
edgenum = size(copEV, 1)
edge_map = Array{Array{Int, 1}, 1}(undef,edgenum)
rV = Lar.Points(zeros(0, 2))
rEV = SparseArrays.spzeros(Int8, 0, 0)
finalcells_num = 0
    V=W
# sequential (iterative) processing of edge fragmentation
for i in 1:edgenum
    v, ev = Lar.Arrangement.frag_edge(V, copEV, i, bigPI)
    newedges_nums = map(x->x+finalcells_num, collect(1:size(ev, 1)))
    edge_map[i] = newedges_nums
    finalcells_num += size(ev, 1)
    rV = convert(Lar.Points, rV)
    rV, rEV = Lar.skel_merge(rV, rEV, v, ev)
end
V, copEV = rV, rEV

([2.0 2.0; 250.0 2.0; … ; 237.0 4.0; 240.0 4.0], 
  [1  ,    1]  =  1
  [119,    2]  =  1
  [1  ,    5]  =  1
  [2  ,    5]  =  1
  [2  ,    6]  =  1
  [3  ,    6]  =  1
  [3  ,    7]  =  1
  [4  ,    7]  =  1
  [4  ,    8]  =  1
  [5  ,    8]  =  1
  [5  ,    9]  =  1
  [6  ,    9]  =  1
  ⋮
  [653, 1446]  =  1
  [654, 1449]  =  1
  [655, 1449]  =  1
  [656, 1450]  =  1
  [655, 1452]  =  1
  [656, 1452]  =  1
  [654, 1453]  =  1
  [657, 1455]  =  1
  [658, 1456]  =  1
  [657, 1458]  =  1
  [658, 1458]  =  1
  [659, 1461]  =  1
  [659, 1462]  =  1)

## 0) Benchmark vecchia funzione

In [4]:
V1 = V
copEV1=copEV
edge_map1 = edge_map
@btime V2, copEV2 = merge_vertices!(V1, copEV1,edge_map1)

  86.115 ms (1423599 allocations: 37.09 MiB)


([2.0 2.0; 250.0 2.0; … ; 237.0 4.0; 240.0 4.0], 
  [1  ,   1]  =  1
  [120,   1]  =  1
  [119,   2]  =  1
  [122,   2]  =  1
  [1  ,   3]  =  1
  [2  ,   3]  =  1
  [124,   3]  =  1
  [125,   3]  =  1
  [2  ,   4]  =  1
  [3  ,   4]  =  1
  [126,   4]  =  1
  [127,   4]  =  1
  ⋮
  [468, 352]  =  1
  [467, 353]  =  1
  [470, 353]  =  1
  [469, 354]  =  1
  [470, 354]  =  1
  [471, 355]  =  1
  [472, 355]  =  1
  [471, 356]  =  1
  [474, 356]  =  1
  [473, 357]  =  1
  [476, 357]  =  1
  [475, 358]  =  1
  [476, 358]  =  1)

## 1) Controllo se la funzione è type unstable

In [5]:
V2 = V
copEV2=copEV
edge_map2 = edge_map
@code_warntype  merge_vertices!(V2, copEV2,edge_map2)

Variables
  #self#[36m::Core.Compiler.Const(merge_vertices!, false)[39m
  V[36m::Array{Float64,2}[39m
  EV[36m::SparseMatrixCSC{Int8,Int64}[39m
  edge_map[36m::Array{Array{Int64,1},1}[39m

Body[91m[1m::Tuple{Any,SparseMatrixCSC{Int8,Int64}}[22m[39m
[90m1 ─[39m %1 = (#self#)(V, EV, edge_map, 0.0001)[91m[1m::Tuple{Any,SparseMatrixCSC{Int8,Int64}}[22m[39m
[90m└──[39m      return %1


La funzione è type stable in quanto ritorna:
    
    Body::Tuple{Any,SparseMatrixCSC{Int8,Int64}}

## 2) Parallelizzazione cicli usando i threads  
  
Per eseguire questa cella, aprire il proprio REPL Julia e aprire questo notebook con i comandi:

ENV["JULIA_NUM_THREADS"] = 4 (o un altro numero)
using IJulia
notebook()

Noi studenti, avendo computer dual-core abbiamo fissato in numero di threads pari a 2. Allocarne di più sarebbe stato inutile (in quanto i threads >2 avrebbero aspettato comunque ..) e anzi, mettendo un numero di threads pari a 4 su un dual-core abbiamo notato un grosso peggioramento delle performance dovuta probabilmente all'attesa.  
  
  
In questa funzione sono presenti 4 cicli, 2 dei quali parallelizzabili ( -1) quello che crea gli arrays edges e oedges, -2) quello che crea l'edge_map) e 2 non parallelizzabili ( -1) Quello che fa la ricerca sul kdTree, 2) quello che crea il dict etuple2idx).  
  
Lavorando ai cicli parallelizzabili abbiamo apportato migliorie al codice:  
  
1) Abbiamo tolto i Sort, sostituendoli con espressioni "v1<v2 ? [v1, v2] :  [v2, v1]" in quanto gli edge sono composti da solo 2 elementi (i sort sono molto onerosi!)  
  
2) Abbiamo trasformato i map in for per renderli accessibili ai threads.



In [87]:
using Base.Threads
function merge_vertices2!(V::Lar.Points, EV::Lar.ChainOp, edge_map, err=1e-4)
    vertsnum = size(V, 1)
    edgenum = size(EV, 1)
    newverts = zeros(Int, vertsnum)
    # KDTree constructor needs an explicit array of Float64
    V = Array{Float64,2}(V)
    kdtree = KDTree(permutedims(V))

    # merge congruent vertices
    todelete = []
    i = 1
    for vi in 1:vertsnum
        if !(vi in todelete)
            nearvs = Lar.inrange(kdtree, V[vi, :], err)
            newverts[nearvs] .= i
            nearvs = setdiff(nearvs, vi)
            todelete = union(todelete, nearvs)
            i = i + 1
        end
    end
    nV = V[setdiff(collect(1:vertsnum), todelete), :]

    # merge congruent edges
    edges = Array{Tuple{Int, Int}, 1}(undef, edgenum)
    oedges = Array{Tuple{Int, Int}, 1}(undef, edgenum)
    @threads for ei=1:edgenum
        v1, v2 = EV[ei, :].nzind
        edges[ei]  = Tuple{Int, Int}(newverts[v1]<newverts[v2] ? [newverts[v1], newverts[v2]] : [newverts[v2], newverts[v1]])
        oedges[ei] = Tuple{Int, Int}(v1<v2 ? [v1, v2] :  [v2, v1])
    end
    nedges = union(edges)
    nedges = filter(t->t[1]!=t[2], nedges)
    nedgenum = length(nedges)
    nEV = spzeros(Int8, nedgenum, size(nV, 1))
    # maps pairs of vertex indices to edge index
    etuple2idx = Dict{Tuple{Int, Int}, Int}()
    # builds `edge_map`
    for ei in 1:nedgenum
        nEV[ei, collect(nedges[ei])] .= 1
        etuple2idx[nedges[ei]] = ei
    end
    @threads for i=1:length(edge_map)
        rowT=Array{Tuple{Int64,Int64}}(undef,length(edge_map[i]))
        len = length(edge_map[i])
        for j=1:len
            rowT[j]=edges[edge_map[i][j]]
        end
        filter!(t->t[1]!=t[2], rowT)
        edge_map[i]=Array{Int64}(undef,length(rowT))
        len2 = length(rowT)
        for j=1:len2
            edge_map[i][j]=etuple2idx[rowT[j]]
        end        
    end
    # return new vertices and new edges
    return Lar.Points(nV), nEV
end

V3 = V
copEV3=copEV
edge_map3 = edge_map
nt=nthreads()
println("Numero threads allocati : $nt")
@btime merge_vertices2!(V3, copEV3,edge_map3)


Numero threads allocati : 2
  68.867 ms (1422060 allocations: 36.95 MiB)


([2.0 2.0; 250.0 2.0; … ; 237.0 4.0; 240.0 4.0], 
  [1  ,   1]  =  1
  [120,   1]  =  1
  [119,   2]  =  1
  [122,   2]  =  1
  [1  ,   3]  =  1
  [2  ,   3]  =  1
  [124,   3]  =  1
  [125,   3]  =  1
  [2  ,   4]  =  1
  [3  ,   4]  =  1
  [126,   4]  =  1
  [127,   4]  =  1
  ⋮
  [468, 352]  =  1
  [467, 353]  =  1
  [470, 353]  =  1
  [469, 354]  =  1
  [470, 354]  =  1
  [471, 355]  =  1
  [472, 355]  =  1
  [471, 356]  =  1
  [474, 356]  =  1
  [473, 357]  =  1
  [476, 357]  =  1
  [475, 358]  =  1
  [476, 358]  =  1)