In [124]:
using JLD
using Statistics
using Distributions
using HypothesisTests
using FuzzyClusteringSimilarity

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling RCall [6f49c342-dc21-5d91-9882-a32aef131414]


In [79]:
res = load("ErrorAnalysis.jld")
results = res["results"]

#Dimension 1 -> Fit, Sym, Flat
#Dimension 2 -> Run number, varying parameter values. See DataGeneration script
#Dimension 3 -> Computations of the same pair of clusterings.

# Remove Columns with all negatives
fitNoNegCols = results[1, collect(Bool, 1 .- vec(sum(y -> y<0, results[1, :, :], dims=2) .== 100)), :]
symNoNegCols = results[2, collect(Bool, 1 .- vec(sum(y -> y<0, results[2, :, :], dims=2) .== 100)), :]
flatNoNegCols = results[3, collect(Bool, 1 .- vec(sum(y -> y<0, results[3, :, :], dims=2) .== 100)), :]
            
#Calculate Absolute Errors
fitae = abs.(fitNoNegCols .- mean(fitNoNegCols, dims=2))
symae = abs.(symNoNegCols .- mean(symNoNegCols, dims=2))
flatae = abs.(flatNoNegCols .- mean(flatNoNegCols, dims=2))

135×100 Matrix{Float64}:
 8.75858e-6   3.8832e-5    0.000114843  …  0.000853314  8.43737e-5
 7.06577e-5   0.000152804  0.000513902     0.000398821  3.51612e-5
 0.00080299   0.000172414  5.97475e-5      0.000303503  2.08038e-5
 0.000229889  7.39754e-5   0.000990611     0.000249623  0.000368271
 0.000709494  0.000775577  8.30588e-5      0.000610036  0.000156073
 4.50008e-5   0.000380675  0.000563521  …  7.45643e-5   0.000797942
 0.000161843  0.000282659  0.000634726     0.000263511  0.000820961
 0.000517992  0.000827338  0.000652818     0.000386253  0.000395717
 0.000567389  0.000561509  0.000245669     0.000102307  0.00015699
 0.000149214  0.000109124  7.42929e-5      0.000402913  0.00032814
 0.000219371  0.000361839  0.000391971  …  0.000248356  1.6493e-5
 0.000163777  0.00111924   0.000213838     9.62494e-5   0.000931808
 0.000302748  1.5837e-5    0.00103882      0.000177533  0.000151862
 ⋮                                      ⋱               
 0.00105296   0.000332806  0.000186151   

## Maximum Absolute Error

In [80]:
# Mean over computations of same clusterings, take the absolute value.
maxAbsoluteErrorFit = maximum(fitae)
maxAbsoluteErrorSym = maximum(symae)
maxAbsoluteErrorFlat = maximum(flatae)

println("Fit Max Absolute Error: ", maxAbsoluteErrorFit)
println("Sym Max Absolute Error: ", maxAbsoluteErrorSym)
println("Flat Max Absolute Error: ", maxAbsoluteErrorFlat)

Fit Max Absolute Error: 0.01969712861722592
Sym Max Absolute Error: 0.018762679522152326
Flat Max Absolute Error: 0.002433261435505013


## % MAE < 0.01

In [81]:
percentileFit = sum(y -> y < 0.01, fitae)/ (size(fitae, 1) * 100)
percentileSym = sum(y -> y < 0.01, symae)/ (size(symae, 1) * 100)
percentileFlat = sum(y -> y < 0.01, flatae)/ (size(flatae, 1) * 100)

println("The proportion of Fit computations with absolute error less than 0.01: ", percentileFit)
println("The proportion of Sym computations with absolute error less than 0.01: ", percentileSym)
println("The proportion of Flat computations with absolute error less than 0.01: ", percentileFlat)

The proportion of Fit computations with absolute error less than 0.01: 0.9975603864734299
The proportion of Sym computations with absolute error less than 0.01: 0.995112540192926
The proportion of Flat computations with absolute error less than 0.01: 1.0
