In [1]:
using Statistics, OrderedCollections, Dates, Parameters

In [3]:
include("aggreg.jl");

In [4]:
# using EnergySystemModeling: SeriesInstance,
#     ClustInstance,
#     AggregInstance,
#     DistUpdate,
#     load_series_instance,
#     load_clust_instance,
#     aggreg1D,
#     cdad,
#     search_min_dist,
#     compute_dist,
#     update_marker,
#     replace_lines,
#     update_clust!,
#     update_k!

In [5]:
# Define a new copy method to copy _ClustInstance and _SeriesInstance
Base.copy(x::T) where T = T([getfield(x, k) for k ∈ fieldnames(T)]...);

In [6]:
function test_paral(series::VecOrMat{Float64},
    block_size::Int,
    stopping_k::Int,
    current_k::Int,
    dm::Symbol,
    rep_value::Symbol,
    lseries::Int,
    nseries::Int,
    series_dc::VecOrMat{Float64},
    ord_dc::VecOrMat{Int},
    k_cent::VecOrMat{Float64},
    weights::Vector{Int},
    series_clust::Vector{Int},
    nclusters::Int,
    search_range::UnitRange,
    dc_mode::Bool,
    _SeriesInstance::SeriesInstance,
    _ClustInstance::ClustInstance,
    _DistUpdate::Dict{Vector{Bool},DistUpdate},
    _SeriesUpdate::Dict{String,SeriesInstance},
    _ClustUpdate::Dict{String,ClustInstance})

    # @info "Clustering the next step"
    k = lseries
    @time while k >= stopping_k + block_size - 1
        
        # Vector with distances (to be updated as it goes)
        dist = Vector{Float64}(undef,length(search_range))

        # Sets
        N = 1:nseries
        K = copy(search_range)

        # Compute the distance for each aggregation (i.e., changing the merging_clust)
        # TODO: implement parallelisation such as '@async Threads.@threads @inbounds for k in K'
        @inbounds for k_search in K
            # Merging to be tested (neighbouring hypothesis)
            # TODO: implement non-neighbouring hypothesis
            merging_clust = k_search:k_search+block_size-1
            # Create a temporary marker to merge the clusters tested
            marker_temp = [sc in merging_clust for sc in series_clust]

            # Separation needed for duration curves analysis
            if dc_mode
                # Create a temp marker for the elements in between clustered [min,max] order
                marker_temp_dc = minimum(ord_dc[marker_temp,:]):maximum(ord_dc[marker_temp,:])
                # Using duration curves chunks (in between the min and max values of marker_temp)
                series_comp = sort(series, dims=1,rev=true)[marker_temp_dc,:]
                # Forming the centroids
                k_cent_comp = copy(series)
                (k_cent_comp[marker_temp,:],) = aggreg1D(series[marker_temp,:], rep_value)
                # Ordering chunk with clustering instance in a decrescent order
                k_cent_comp = sort(k_cent_comp, dims=1, rev=true)[marker_temp_dc,:]
            else
                # Part of series compared
                series_comp = series[marker_temp,:]
                # Centroids of the temporarily formed cluster (TODO: implement another method for aggreg1D receiving the clusters with respective weights)
                (k_cent_comp,) = aggreg1D(series_comp, rep_value)
            end
            
            # Distance computation
            dist[k_search] = compute_dist(N, dm, series_comp, k_cent_comp)
        end

        # Find whenever the min_dist occurs first (i.e., using findmin()[2])
        ## TODO: implement the multiple merges (e.g., using findall())
        min_dist = findmin(dist)[2] |> Int        
        merging_clust = min_dist:min_dist+block_size-1

        # Create a flag to the positions in the series that will be aggregated
        marker = update_marker(_SeriesInstance, _ClustInstance, min_dist)    

        # Update _DistUpdate dictionary with the minimal distance found and the new marker
        _DistUpdate = merge(+, _DistUpdate, Dict(marker => DistUpdate(min_dist,merging_clust)))

        # Update clusters and series_clust
        update_clust!(_ClustInstance, _SeriesInstance, min_dist)

        # Update clustering values
        series_clust = _ClustInstance.series_clust
        weights = _ClustInstance.weights

        nclusters = _ClustInstance.nclusters
        k_cent = _ClustInstance.k_cent
        search_range =_ClustInstance.search_range
        
        # Update number of clusters k
        new_current_k = _ClustInstance.nclusters
        update_k!(_SeriesInstance, new_current_k)
        k = new_current_k

        # Store series_clust and k_cent
        _ClustUpdate = merge(+,_ClustUpdate,Dict("$k" => copy(_ClustInstance)))
        _SeriesUpdate = merge(+,_SeriesUpdate,Dict("$k" => copy(_SeriesInstance)))

    end
end

test_paral (generic function with 1 method)

In [15]:
function execute_inst(lseries::Int,nseries::Int,dc_mode::Bool)
    
    # Declare series
    series = rand(lseries,nseries);
    
    # Forming SeriesInstance
    block_size = 2
    stopping_k = 1
    current_k = lseries
    rep_value = :mean
    series_dc = sort(series, dims=1, rev=true)
    ord_dc = reduce(hcat,sortperm.(collect(eachslice(series,dims=2)),rev=true))
    
    # Forming ClustInstance
    k_cent = copy(series)
    weights = ones(lseries) |> Vector{Int64}
    series_clust = collect(1:lseries) |> Vector{Int64}
    nclusters = lseries
    search_range = 1:size(series,1)-block_size+1
    dm = :ward

    # Declaring instances
    _SeriesInstance = load_series_instance(series,block_size,current_k,stopping_k,dm,rep_value,lseries,nseries,series_dc,ord_dc);
    _ClustInstance = load_clust_instance(k_cent,series_clust,weights,search_range,dc_mode)

    # Dictionary to keep the min distances and respective markers/min_dist found in each iteration
    _DistUpdate = Dict{Vector{Bool}, DistUpdate}()

    # Dictionaries to store series_clust and k_cent
    _SeriesUpdate = Dict{String,SeriesInstance}()
    _ClustUpdate = Dict{String,ClustInstance}();

    
    test_paral(series,block_size,
    stopping_k,
    current_k,
    dm,
    rep_value,
    lseries,
    nseries,
    series_dc,
    ord_dc,
    k_cent,
    weights,
    series_clust,
    nclusters,
    search_range,
    dc_mode,
    _SeriesInstance,
    _ClustInstance,
    _DistUpdate,
    _SeriesUpdate,
    _ClustUpdate)
end

execute_inst (generic function with 1 method)

## Test 1: 100 lines random series

In [18]:
execute_inst(100,4,true)

  0.277453 seconds (329.49 k allocations: 153.613 MiB, 4.30% gc time)


In [19]:
execute_inst(100,4,false)

  0.071736 seconds (210.69 k allocations: 25.153 MiB, 7.14% gc time)


## Test 2: 200 lines random series

In [20]:
execute_inst(200,4,true)

  2.120620 seconds (1.30 M allocations: 1.089 GiB, 4.18% gc time)


In [21]:
execute_inst(200,4,false)

  0.322298 seconds (821.50 k allocations: 122.399 MiB, 10.74% gc time)


## Test 3: 500 lines random series

In [22]:
execute_inst(500,4,true)

 37.707016 seconds (8.17 M allocations: 16.596 GiB, 2.76% gc time)


In [23]:
execute_inst(500,4,false)

  2.415778 seconds (5.18 M allocations: 1.650 GiB, 6.71% gc time)
