In [None]:
using Revise
using Plots
import Random
using Test

In [None]:
using Cluster

### Generate a Cluster Problem
- Create 4 distinct points.
- Added noise about these points.

In [None]:
Random.seed!(1)
M1 = [-1,-2] .+ rand(2, 100)
M2 = 3.0 .* [1,2] .+ rand(2, 100)
M3 = 6.0 .* [2,1] .+ rand(2, 100)
M4 = 9.0 .* [1,1] .+ rand(2, 100)
M5 = 12.0 .* [-1, 1] .+ rand(2, 100)
M6 = 15.0 .* [0.5, 3.0] .+ rand(2, 100)
M7 = 18.0 .+ [-2.4, 1.0] .+ rand(2, 100)
M8 = 21.0 .+ [0.3, -0.3] .* rand(2, 100)
M9 = 24.0 .+ rand(2, 100)
M10 = 27.0 .+ rand(2, 100)

M = hcat(M1, M2, M3, M4, M5, M6, M7, M8, M9, M10)

### Find the best Clusters
- Find best info for a range of cluster numbers.
    - `ds` : The Total Variation for the cluster.                    Int -> Float
    - `mp` : Map of the Index of a point to the index of a Centroid. Int -> (Int -> Int) 
    - `xc` : The map of Centroid Indices to Centroids.               Int -> (2xn)Matrix{Float}
    - `sd` : The list of unused Centroid Indices.                    Int -> Vector{Int}

In [None]:
using Cluster
kbest, mp, xc, ds = find_best_cluster(M, 2:15, verbose=true, num_trials=300, N=1000, threshold=1.0e-2)

In [None]:
xc

In [None]:
ds

### Plot the Result

In [None]:
x = xc[1, :]
y = xc[2, :]

In [None]:
xp = M[1, :]
yp = M[2, :];

In [None]:
xs = vcat(xp, x)
xmin = minimum(xs)
xmax = maximum(xs)

ys = vcat(yp, y)
ymin = minimum(ys)
ymax = maximum(ys)

plot(xp, yp, seriestype=:scatter, color="blue", legend=:none, xlims=(xmin-1.0, xmax+1.0), ylims=(ymin-1.0, ymax+1.0))
g = plot!(x, y, seriestype=:scatter, color="yellow")

In [None]:
savefig(g,"file.svg")

## Apply find_best_cluster to Iris DataSet

In [None]:
import RDatasets

In [None]:
iris = RDatasets.dataset("datasets", "iris")

In [None]:
MI = permutedims(Matrix(iris[:, [:SepalWidth, :SepalLength]]), (2,1))


In [None]:
kbest, mp, xc, ds = find_best_cluster(MI, 2:7; dmetric=L2, verbose=true, num_trials=300, N=1000, threshold=1.0e-2)

In [None]:
x = xc[1, :]
y = xc[2, :];

In [None]:
xp = MI[1, :]
yp = MI[2, :];
sp = map(iris[:, :Species]) do spec
      if spec == "virginica"
        1
    elseif spec == "versicolor"
        2
    else
        3
    end
    end;

In [None]:
xs = vcat(xp, x)
xmin = minimum(xs)
xmax = maximum(xs)

ys = vcat(yp, y)
ymin = minimum(ys)
ymax = maximum(ys)

plot(xp, yp, seriestype=:scatter, color=sp, legend=:none, xlims=(xmin-1.0, xmax+1.0), ylims=(ymin-1.0, ymax+1.0))
g = plot!(x, y, seriestype=:scatter, color="yellow")

In [None]:
savefig(g,"iris.svg")

In [None]:
kbest, mp, xc, ds = find_best_cluster(MI, 2:7; dmetric=(x,y;kwargs...) -> LP(x,y,1;kwargs...), verbose=true, num_trials=300, N=1000, threshold=1.0e-2)

In [None]:
x = xc[1, :]
y = xc[2, :]

In [None]:
xp = MI[1, :]
yp = MI[2, :];
sp = map(iris[:, :Species]) do spec
      if spec == "virginica"
        1
    elseif spec == "versicolor"
        2
    else
        3
    end
    end;

In [None]:
xs = vcat(xp, x)
xmin = minimum(xs)
xmax = maximum(xs)

ys = vcat(yp, y)
ymin = minimum(ys)
ymax = maximum(ys)

plot(xp, yp, seriestype=:scatter, color=sp, legend=:none, xlims=(xmin-1.0, xmax+1.0), ylims=(ymin-1.0, ymax+1.0))
g = plot!(x, y, seriestype=:scatter, color="yellow")

In [None]:
kbest, mp, xc, ds = find_best_cluster(MI, 2:7; dmetric=KL, verbose=true, num_trials=300, N=1000, threshold=1.0e-2)

In [None]:
x = xc[1, :]
y = xc[2, :]

In [None]:
xp = MI[1, :]
yp = MI[2, :];
sp = map(iris[:, :Species]) do spec
      if spec == "virginica"
        1
    elseif spec == "versicolor"
        2
    else
        3
    end
    end;

In [None]:
xs = vcat(xp, x)
xmin = minimum(xs)
xmax = maximum(xs)

ys = vcat(yp, y)
ymin = minimum(ys)
ymax = maximum(ys)

plot(xp, yp, seriestype=:scatter, color=sp, legend=:none, xlims=(xmin-1.0, xmax+1.0), ylims=(ymin-1.0, ymax+1.0))
g = plot!(x, y, seriestype=:scatter, color="yellow")

In [None]:
ds