In [116]:
#Include
using Plots, LightGraphs, SparseArrays, SimpleWeightedGraphs
using Statistics, BenchmarkTools, LinearAlgebra, ProgressMeter
using Distributions, Base.Threads, CSV, StatsBase
using Base.GC, JLD2, FileIO, Random
plotly();

In [307]:
function stochastic_itml(X,S,D,u,l,A₀,γ,iters,K)
    A = copy(A₀)
    _,n = size(X)
    λ = spzeros(n,n)
    ξ = l*ones(n,n)
    for (i,j) in S
        ξ[i,j] = u
    end
    
    #@showprogress for (i,j) in D
    #    ξ[i,j] = l
    #end
    
    s = length(S)
    d = length(D)
    e = 1e-30
    
    for k = 1:iters
        idx = rand(1:s)
        (i,j) = S[idx]
        
        p = (X[:,i]-X[:,j])'*A*(X[:,i]-X[:,j])
        δ = 1.0
        α = min(λ[i,j],δ/2*(1/(p+e)-γ/(ξ[i,j]+e)))
        β = δ*α/(1-δ*α*p+e)
        
        ξ[i,j] = γ*ξ[i,j]/(γ+δ*α*ξ[i,j]+e)
        
        λ[i,j] -= α
        
        A += β*A*(X[:,i]-X[:,j])*(X[:,i]-X[:,j])'*A
        
        idx = rand(1:d)
        (i,j) = D[idx]
        
        p = (X[:,i]-X[:,j])'*A*(X[:,i]-X[:,j])
        δ = -1.0
        α = min(λ[i,j],δ/2*(1/(p+e)-γ/(e+ξ[i,j])))
        β = δ*α/(1-δ*α*p)
        
        ξ[i,j] = γ*ξ[i,j]/(γ+δ*α*ξ[i,j]+e)
        
        λ[i,j] -= α
        
        A += β*A*(X[:,i]-X[:,j])*(X[:,i]-X[:,j])'*A
        
        if k%1e3 == 0
            I,J,_ = findnz(λ)
            for r = 1:length(I)
                i = I[r]
                j = J[r]
                p = (X[:,i]-X[:,j])'*A*(X[:,i]-X[:,j])
                if y[i] == y[j]
                    δ = 1.0
                else
                    δ = -1.0
                end
                α = min(λ[i,j],δ/2*(1/(p+e)-γ/(ξ[i,j]+e)))
                β = δ*α/(1-δ*α*p+e)
        
                ξ[i,j] = γ*ξ[i,j]/(γ+δ*α*ξ[i,j]+e)
        
                λ[i,j] -= α
        
                A += β*A*(X[:,i]-X[:,j])*(X[:,i]-X[:,j])'*A
            end
        end
        
        if k%1e5 == 0
            classify(A,K)
        end
            
       
    end
    classify(A,K)
    
    return A
end

stochastic_itml (generic function with 2 methods)

In [287]:
function itml(X,S,D,u,l,A₀,γ)
    A = copy(A₀)
    _,n = size(X)
    λ = spzeros(n,n)
    ξ = l*ones(n,n)
    
    @show(n)
    flush(stdout)
    
    for (i,j) in S
        ξ[i,j] = u
    end
    
    N = 20*num_classes^2
    Constraints = Dict()
    while(length(Constraints) < N)
        i = rand(1:n)
        j = rand(1:n)
        if i != j
            a = min(i,j)
            b = max(i,j)
            if y[b] == y[a]
                Constraints[(b,a)] = 1
            else
                Constraints[(b,a)] = 2 
            end
        end
    end
    
    @show(length(Constraints))
    flush(stdout)
    
    e = 1e-30
    for k = 1:1e2
        for (i,j) in keys(Constraints)
            if Constraints[(i,j)] == 1
                p = (X[:,i]-X[:,j])'*A*(X[:,i]-X[:,j])
                δ = 1.0
                α = min(λ[i,j],δ/2*(1/(p+e)-γ/(e+ξ[i,j])))
                β = δ*α/(1-δ*α*p+e)
            
                ξ[i,j] = γ*ξ[i,j]/(γ+δ*α*ξ[i,j]+e)
                ξ[j,i] = ξ[i,j]
        
                λ[i,j] -= α
                λ[j,i] = λ[i,j]
        
                A += β*A*(X[:,i]-X[:,j])*(X[:,i]-X[:,j])'*A
            else
                p = (X[:,i]-X[:,j])'*A*(X[:,i]-X[:,j])
                δ = -1.0
                α = min(λ[i,j],δ/2*(1/(e+p)-γ/(e+ξ[i,j])))
                β = δ*α/(1-δ*α*p+e)
        
                ξ[i,j] = γ*ξ[i,j]/(γ+δ*α*ξ[i,j]+e)
                ξ[j,i] = ξ[i,j]
        
                λ[i,j] -= α
                λ[j,i] = λ[i,j]
        
                A += β*A*(X[:,i]-X[:,j])*(X[:,i]-X[:,j])'*A
            end
        end
        
        if k %1e2 == 0
            classify(A,5)
        end
    end
    
    return A
end

itml (generic function with 1 method)

In [69]:
function read_data(filename)
    lines = readlines(open(filename))
    num_variables = 0
    for i = 1:length(lines)
        if lines[i][1] != '@'
            num_variables = i - 6
            break
        end
    end
    
    @show(num_variables)
    
    X = zeros(num_variables,length(lines)-num_variables-6)
    y = zeros(length(lines)-num_variables-6)
    
    s = num_variables + 6
    classes = Dict()
    num_classes = 0
    
    for i = s:length(lines)-1
        line = split(lines[i],",")
        for j = 1:num_variables
            X[j,i-s+1] = parse(Float64,line[j])
        end
        if haskey(classes,line[end])
            y[i-s+1] = classes[line[end]]
        else
            classes[line[end]] = num_classes+1
            num_classes += 1
            y[i-s+1] = classes[line[end]]
        end
        #y[i-s+1] = parse(Float64,line[end])+1
    end
    
    return X,y,classes
end

function eig_proj(A)
    F = eigen((A'+A)/2, permute=false)
    p = F.values .> 0
    return F.vectors *Diagonal(p.*F.values)*F.vectors'
end

function classify(A,K)
    A = eig_proj(A)
    n = size(Xtst,2)
    ypred = zeros(n)
    for i = 1:n
        x = Xtst[:,i]
        dists = zeros(size(Xtrn,2))
        for j = 1:size(Xtrn,2)
            dists[j] = sqrt((Xtrn[:,j]-x)'*A*(Xtrn[:,j]-x))
        end
        p = sortperm(dists)
        ngbd = ytrn[p[1:K]]
        Count = zeros(num_classes)
        for i = 1:length(ngbd)
            Count[Int(ngbd[i])] += 1
        end
        ypred[i] = findmax(Count)[2]
    end
    
    @show(sum(ypred .== ytst)/length(ytst))
    flush(stdout)
    
    return sum(ypred .== ytst)/length(ytst)
end

classify (generic function with 1 method)

In [312]:
X,y,labels = read_data("./penbased.dat")

num_variables = 16


([47.0 0.0 … 64.0 0.0; 100.0 89.0 … 100.0 63.0; … ; 40.0 100.0 … 43.0 48.0; 98.0 6.0 … 0.0 18.0], [1.0, 2.0, 3.0, 4.0, 3.0, 5.0, 4.0, 6.0, 7.0, 8.0  …  2.0, 10.0, 2.0, 4.0, 4.0, 4.0, 9.0, 4.0, 4.0, 9.0], Dict{Any,Any}(" 3"=>10," 0"=>6," 9"=>8," 6"=>5," 5"=>7," 1"=>3," 8"=>1," 2"=>2," 7"=>9," 4"=>4…))

In [313]:
num_classes = length(Set(y))
n = size(X,2)
d = size(X,1)
t = Int(floor(0.8*n))
p = randperm(n)
Xtrn = X[:,p[1:t]]
Xtst = X[:,p[t+1:end]]
ytrn = y[p[1:t]]
ytst = y[p[t+1:end]];

In [314]:
num_classes

10

In [315]:
length(y)

10991

In [316]:
Sys.free_memory()/2^(30)

34.53087615966797

In [317]:
function make_SD(X,y)
    n = length(y)
    S = []
    D = []
    
    for i = 1:n
        for j = 1:i-1
            if y[i] == y[j]
                push!(S,(i,j))
            else
                push!(D,(i,j))
            end
        end
    end
    
    return S,D
end

make_SD (generic function with 1 method)

In [318]:
S,D = make_SD(Xtrn,ytrn)

(Any[(4, 2), (6, 1), (9, 2), (9, 4), (10, 3), (11, 1), (11, 6), (13, 1), (13, 6), (13, 11)  …  (8792, 8662), (8792, 8667), (8792, 8696), (8792, 8707), (8792, 8716), (8792, 8725), (8792, 8756), (8792, 8757), (8792, 8777), (8792, 8778)], Any[(2, 1), (3, 1), (3, 2), (4, 1), (4, 3), (5, 1), (5, 2), (5, 3), (5, 4), (6, 2)  …  (8792, 8782), (8792, 8783), (8792, 8784), (8792, 8785), (8792, 8786), (8792, 8787), (8792, 8788), (8792, 8789), (8792, 8790), (8792, 8791)])

In [319]:
Sys.free_memory()/2^(30)

33.27451705932617

In [320]:
#n = length(y)
A₀ = Matrix(I,d,d)
u = 1
l = 10
γ = 1

1

In [None]:
@time A₁ = stochastic_itml(Xtrn,S,D,u,l,Matrix(I,d,d),γ,1e8,5)

sum(ypred .== ytst) / length(ytst) = 0.9745338790359254
sum(ypred .== ytst) / length(ytst) = 0.9763528876762164
sum(ypred .== ytst) / length(ytst) = 0.9758981355161437
sum(ypred .== ytst) / length(ytst) = 0.9763528876762164
sum(ypred .== ytst) / length(ytst) = 0.9749886311959982
sum(ypred .== ytst) / length(ytst) = 0.975443383356071
sum(ypred .== ytst) / length(ytst) = 0.9758981355161437
sum(ypred .== ytst) / length(ytst) = 0.9768076398362893
sum(ypred .== ytst) / length(ytst) = 0.9768076398362893
sum(ypred .== ytst) / length(ytst) = 0.9763528876762164
sum(ypred .== ytst) / length(ytst) = 0.9768076398362893
sum(ypred .== ytst) / length(ytst) = 0.975443383356071
sum(ypred .== ytst) / length(ytst) = 0.9758981355161437
sum(ypred .== ytst) / length(ytst) = 0.9758981355161437
sum(ypred .== ytst) / length(ytst) = 0.9763528876762164
sum(ypred .== ytst) / length(ytst) = 0.9758981355161437
sum(ypred .== ytst) / length(ytst) = 0.977262391996362
sum(ypred .== ytst) / length(ytst) = 0.977262391996

In [None]:
length(ytst)

In [310]:
avg_acc = zeros(100);

In [311]:
for i=1:100
    A₂ = itml(Xtrn,S,D,u,l,Matrix(I,d,d),γ)
    avg_acc[i] = classify(A₂,5)
end

@show(mean(avg_acc))

n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8830188679245283
sum(ypred .== ytst) / length(ytst) = 0.8830188679245283
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8830188679245283
sum(ypred .== ytst) / length(ytst) = 0.8830188679245283
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.879245283018868
sum(ypred .== ytst) / length(ytst) = 0.879245283018868
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8773584905660378
sum(ypred .== ytst) / length(ytst) = 0.8773584905660378
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8839622641509434
sum(ypred .== ytst) / length(ytst) = 0.8839622641509434
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8820754716981132
sum(ypred .== ytst) / length(ytst) =

sum(ypred .== ytst) / length(ytst) = 0.8820754716981132
sum(ypred .== ytst) / length(ytst) = 0.8820754716981132
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8858490566037736
sum(ypred .== ytst) / length(ytst) = 0.8858490566037736
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8783018867924528
sum(ypred .== ytst) / length(ytst) = 0.8783018867924528
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8820754716981132
sum(ypred .== ytst) / length(ytst) = 0.8820754716981132
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
n = 4239
length(Constraints) = 80
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
sum(ypred .== ytst) / length(ytst) = 0.8811320754716981
n = 4239
len

0.8826792452830188

In [299]:
sum(avg_acc)/75

0.9285233333333334

In [None]:
A = sparse(rand(1:5,5))

In [None]:
nz(A)

In [None]:
size(Xtrn,2)

In [None]:
size(X)

In [34]:
?eigen

search: [0m[1me[22m[0m[1mi[22m[0m[1mg[22m[0m[1me[22m[0m[1mn[22m [0m[1me[22m[0m[1mi[22m[0m[1mg[22m[0m[1me[22m[0m[1mn[22m! [0m[1mE[22m[0m[1mi[22m[0m[1mg[22m[0m[1me[22m[0m[1mn[22m [0m[1me[22m[0m[1mi[22m[0m[1mg[22m[0m[1me[22m[0m[1mn[22mvector_centrality G[0m[1me[22mneral[0m[1mi[22mzedEi[0m[1mg[22m[0m[1me[22m[0m[1mn[22m



```
eigen(A; permute::Bool=true, scale::Bool=true) -> Eigen
```

Computes the eigenvalue decomposition of `A`, returning an `Eigen` factorization object `F` which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)

Iterating the decomposition produces the components `F.values` and `F.vectors`.

The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).

For general nonsymmetric matrices it is possible to specify how the matrix is balanced before the eigenvector calculation. The option `permute=true` permutes the matrix to become closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to make rows and columns more equal in norm. The default is `true` for both options.

# Examples

```jldoctest
julia> F = eigen([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
Eigen{Float64,Float64,Array{Float64,2},Array{Float64,1}}
eigenvalues:
3-element Array{Float64,1}:
  1.0
  3.0
 18.0
eigenvectors:
3×3 Array{Float64,2}:
 1.0  0.0  0.0
 0.0  1.0  0.0
 0.0  0.0  1.0

julia> F.values
3-element Array{Float64,1}:
  1.0
  3.0
 18.0

julia> F.vectors
3×3 Array{Float64,2}:
 1.0  0.0  0.0
 0.0  1.0  0.0
 0.0  0.0  1.0

julia> vals, vecs = F; # destructuring via iteration

julia> vals == F.values && vecs == F.vectors
true
```

---

```
eigen(A, B) -> GeneralizedEigen
```

Computes the generalized eigenvalue decomposition of `A` and `B`, returning a `GeneralizedEigen` factorization object `F` which contains the generalized eigenvalues in `F.values` and the generalized eigenvectors in the columns of the matrix `F.vectors`. (The `k`th generalized eigenvector can be obtained from the slice `F.vectors[:, k]`.)

Iterating the decomposition produces the components `F.values` and `F.vectors`.

# Examples

```jldoctest
julia> A = [1 0; 0 -1]
2×2 Array{Int64,2}:
 1   0
 0  -1

julia> B = [0 1; 1 0]
2×2 Array{Int64,2}:
 0  1
 1  0

julia> F = eigen(A, B);

julia> F.values
2-element Array{Complex{Float64},1}:
 0.0 + 1.0im
 0.0 - 1.0im

julia> F.vectors
2×2 Array{Complex{Float64},2}:
  0.0-1.0im   0.0+1.0im
 -1.0-0.0im  -1.0+0.0im

julia> vals, vecs = F; # destructuring via iteration

julia> vals == F.values && vecs == F.vectors
true
```

---

```
eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> Eigen
```

Computes the eigenvalue decomposition of `A`, returning an `Eigen` factorization object `F` which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)

Iterating the decomposition produces the components `F.values` and `F.vectors`.

The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).

The `UnitRange` `irange` specifies indices of the sorted eigenvalues to search for.

!!! note
    If `irange` is not `1:n`, where `n` is the dimension of `A`, then the returned factorization will be a *truncated* factorization.


---

```
eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> Eigen
```

Computes the eigenvalue decomposition of `A`, returning an `Eigen` factorization object `F` which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)

Iterating the decomposition produces the components `F.values` and `F.vectors`.

The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).

`vl` is the lower bound of the window of eigenvalues to search for, and `vu` is the upper bound.

!!! note
    If [`vl`, `vu`] does not contain all eigenvalues of `A`, then the returned factorization will be a *truncated* factorization.



In [290]:
false*1

0

In [291]:
1

1