# In this notebook we implement the 'Diagonal Thresholding' algorithm for sparse PCA introduced in https://www.tandfonline.com/doi/pdf/10.1198/jasa.2009.0121 . 
(maybe look at http://www.mit.edu/~yash/PAPERS/sparse.pdf ?)

In [478]:
using Plots, LinearAlgebra, Random, Distributions, Statistics, MultivariateStats, StatsBase

In [None]:
# Takes in row-vector θ and outputs n samples from the Gaussian spiked covariance model
function spiked(θ, n)
#     θ = [sqrt(s)^(-1) for _ in 1:s]
#     θ = vcat(θ, zeros(d - s))'
    θ .* rand(Normal(), n) + rand(Normal(), n, size(θ)[2])
end

function embed(I, v)
    arr = zeros(length(I), size(v)[2])
    counter = 1
    for index = 1:length(I)
        if I[index]
            arr[index, :] = v[counter, :]
            counter += 1
        end
    end
    return arr
end

function normalize_columns(X)
    m = size(X)[2]
    for col = 1:m
        column = X[:, col]
        n = norm(column)
        if n != 0
            X[:, col] = column .* n^(-1)
        end
    end
    return X
end

# Number of samples
n = 1000
# Number of dimensions
d = 1000
# Sparsity of the principal eigenvector
s = 10

# Constructing the principal eigenvector
θ = [sqrt(s)^(-1) for _ in 1:s]
θ = vcat(θ, zeros(d - s))'
# Generating a sample from the spiked covariance ensemble
X = spiked(θ, n);

# Step 2.: finding the top variances and storing in I
vars = var(X, dims=1)
α = 9
σ² = median(vars)
# ρ² = max(sum(vars .- σ²), σ² √(n\d))
# τ = (√n * ρ²)\ √(σ² * (ρ² + σ²))
cutoff = α * √(n\log(n))
I = vars .> σ² * (1 + cutoff)
k = sum(I)

# Step 3.: performing PCA on subset I of matrix
Y = X[:, I[:]]
Σ = cov(Y)
# ef = eigen(Symmetric(Σ), k:k)
ef = eigen(Symmetric(Σ))
v = ef.vectors

# Step 4.: thresholding the resulting eigenvectors
τ = [mad(v[column, :], normalize = false) for column = 1:size(v)[2]].*(0.6745^(-1))
v = v .* (broadcast(abs, v) .> (1* τ * √(2 * log(k))))
v = normalize_columns(v)

# Step 5.: returning to original dimension
u = embed(I, v);