In [None]:
import Pkg
Pkg.add("StatsBase")

In [None]:
using JuMP
using Ipopt
using Statistics
using StatsBase
using LinearAlgebra
using CSV
using DataFrames
using Random

In [None]:
function optimiser(X, Y; gamma=1)
    
    # Here we assume that X and Y are matrices of size m x d and n x d respectively
    m = size(X)[1]
    n = size(Y)[1]
    d = size(Y)[2]
    
    # The centerpoint is the arithmetic mean of the vectors in X
    c = mean(X, dims=1)
    # println(size(c))
    
    # Required coefficients
    omega = n / m
    gamma = gamma * (n + m)
    
    # Choose the solver
    model = Model(Ipopt.Optimizer)
    set_optimizer_attribute(model, "print_level", 0)
    # solution_summary(model; verbose = false)
    
    # Declare variables
    @variable(model, a[i in 1:d] >= 0)      # Elements on the diagonal of the characteristic matrix
    @variable(model, u[i in 1:m] >= 0)      # Relaxing variables for X
    @variable(model, v[i in 1:n] >= 0)      # Relaxing variables for Y
    
    # Declare objective function
    @objective(model, Min,   omega * sum(u[i] for i in 1:m) 
                           + sum(v[j] for j in 1:n))  
    #                      - gamma * sum(a[k] for k in 1:d))         # Note here instead of having the semiaxes lengths we have their inverses
    
    # Declare the constraints
    @constraint(model, [i in 1:m], u[i] - sum(a[k] * (X[i, k] - c[k]) ^ 2 for k in 1:d) >= 0)
    @constraint(model, [j in 1:n], v[j] + sum(a[k] * (Y[j, k] - c[k]) ^ 2 for k in 1:d) - 2 >= 0)
    
    # Solve the problem
    optimize!(model)
    
    # Print results
    println("Average u ", mean(value.(u)))
    println("Average v ", mean(value.(v)))
    
    # Return the found diagonal and centerpoint
    return (value.(a), c)
    
end

In [None]:
# Path to training data. Note all of the data will be used
path = ""

# Read the CSV file
df = CSV.read(path, DataFrame)
d = ncol(df) - 1  # The label column is not used

labels = df.label
unique_labels = unique(labels)

num_labels = size(unique_labels)[1]

# Define arrays to collect the found diagonals and centers
centers = zeros(num_labels, d)
diags = zeros(num_labels, d)

# Go over all digits and optimize for them
for i in 1:num_labels
    label = unique_labels[i]
    println("\nOptimizing for: ", label)
    df_X = df[df.label .== label, :]
    df_Y = df[df.label .!= label, :]

    df_X = df_X[:, Not([:label])]
    df_Y = df_Y[:, Not([:label])]
    
    df_Y = df_Y[sample(axes(df_Y, 1), 5 * nrow(df_X); replace = false, ordered = true), :]
    
    X = Matrix(df_X)
    Y = Matrix(df_Y)
    
    (diag, center) = optimiser(X, Y)

    for j in 1:d
        centers[i, j] = center[j]
        diags[i, j] = diag[j]
    end
end

# Generate DataFrames
df_diags = DataFrame(diags, :auto)
df_centers = DataFrame(centers, :auto)
df_diags[!, "label"] = unique_labels
df_centers[!, "label"] = unique_labels

# Save as CSV  (PRIO)
CSV.write("",  df_centers)
CSV.write("",  df_diags)

In [None]:
# Path to training data. Note all of the data will be used
path = ""

# Read the CSV file
df = CSV.read(path, DataFrame)
d = ncol(df) - 1  # The label column is not used

labels = df.label
unique_labels = unique(labels)

num_labels = size(unique_labels)[1]

# Max number of labels left out
max_left_out = 4

# Go over the n left out cases
for n in 0:max_left_out
    println("\nNumber of labels left out: $(n)\n")
    # Define arrays to collect the found diagonals and centers
    centers = zeros(num_labels - n, d)
    diags = zeros(num_labels - n, d)
    
    known_labels = sample(unique_labels, num_labels - n, replace = false, ordered = true)
    
    for i in 1:(num_labels - n)
        label = known_labels[i]
        println("\nOptimizing for: ", label)
        df_X = df[df.label .== label, :]
        df_Y = df[in(known_labels[begin:end .!= i]).(df.label), :]

        df_X = df_X[:, Not([:label])]
        df_Y = df_Y[:, Not([:label])]

        df_Y = df_Y[sample(axes(df_Y, 1), 10 * nrow(df_X); replace = false, ordered = true), :]

        X = Matrix(df_X)
        Y = Matrix(df_Y)

        (diag, center) = optimiser(X, Y)

        for j in 1:d
            centers[i, j] = center[j]
            diags[i, j] = diag[j]
        end
    end
    
    # Generate DataFrames
    df_diags = DataFrame(diags, :auto)
    df_centers = DataFrame(centers, :auto)
    df_diags[!, "label"] = known_labels
    df_centers[!, "label"] = known_labels

    # Save as CSV
    CSV.write("",  df_centers)
    CSV.write("",  df_diags)
end

In [None]:
for k in 1:5
    # Path to training data. Note all of the data will be used
    path = ""

    # Read the CSV file
    df = CSV.read(path, DataFrame)
    d = ncol(df) - 1  # The label column is not used

    labels = df.label
    unique_labels = unique(labels)

    num_labels = size(unique_labels)[1]

    # Define arrays to collect the found diagonals and centers
    centers = zeros(num_labels, d)
    diags = zeros(num_labels, d)

    # Go over all digits and optimize for them
    for i in 1:num_labels
        label = unique_labels[i]
        println("\nOptimizing for: ", label)
        df_X = df[df.label .== label, :]
        df_Y = df[df.label .!= label, :]

        df_X = df_X[:, Not([:label])]
        df_Y = df_Y[:, Not([:label])]

        df_Y = df_Y[sample(axes(df_Y, 1), 5 * nrow(df_X); replace = false, ordered = true), :]

        X = Matrix(df_X)
        Y = Matrix(df_Y)

        (diag, center) = optimiser(X, Y)

        for j in 1:d
            centers[i, j] = center[j]
            diags[i, j] = diag[j]
        end
    end

    # Generate DataFrames
    df_diags = DataFrame(diags, :auto)
    df_centers = DataFrame(centers, :auto)
    df_diags[!, "label"] = unique_labels
    df_centers[!, "label"] = unique_labels

    # Save as CSV
    CSV.write("",  df_centers)
    CSV.write("",  df_diags)
end