In [4]:
push!(LOAD_PATH, joinpath(Pkg.dir("SpectralClustering"), "docs"));

# Multiview Embedding

## Co-Regularized Multiview Spectral Clustering
When the dataset has more than one representation, each of them is named view. In the context of spectral clustering, 
co-regularization techniques attempt to encourage the similarity of the examples in the new representation generated 
from the eigenvectors of each view.

Let $X^{(v)}=\{ x_1^{(v)}, x_2^{(v)},...,x_m^{(v)}\}$ be the samples for view $v$ and $L^{(v)}$ the Laplacian matrix 
created from $X$ for view $v$. $U^{(v)}$ is defined as the matrix formed by the first $k$ eigenvectors  of the Laplacian Matrix. A criterion was proposed in \cite{Kumar11} that measures the disagreement between two 
representations: 


  $$
  D(U^{(v)}, U^{(w)}) = \norm{ \frac{K_{U^{(v)}}} {\norm{K_{U^{(v)}}}_F} -\frac{K_{U^{(w)}}}
  {\norm{K_{U^{(w)}}}_F}}_F^2
  $$

 where $K_{U^{(v)}}$ is the similarity matrix generated from the patterns of the new representation $U^{(v)}$ 
and $\vert \vert  \cdot \vert \vert_F $ is the Frobenius norm. If the inner product among the vectors is used as similarity measure, 
$K_{U^{(v)}} = U^{(v)}{U^{(v)}}^T$ is obtained. Ignoring the constant additive and scaling terms, the previous equation 
can be formulated as follows: 


  $$
   D(U^{(v)}, U^{(w)}) = -\Tr{  U^{(v)}{U^{(v)}}^T  U^{(w)}{U^{(w)}}^T } 
  $$

The objective is to minimize the disagreement  among the representations obtained from each view. Therefore, if we have 
$m$ views, we obtain the following optimization problem that combines the invididual spectral clustering objectives and 
the objective that determines the disagreement among the representations: 

$$
\begin{equation*}
\begin{aligned}
& \underset{\begin{matrix} U^{(i)} \in R^{n \times k}, \\ 1 \leq i \leq m 
\end{matrix}}{\text{max}}
& & \sum\limits_{v=1}^m \Tr{{U^{(v)}}^T L^{(v)}  U^{(v)}} + \lambda \sum\limits_{ {\begin{matrix} 1 \leq v, w \leq m\\ v\neq w \end{matrix} } } 
\Tr{{U^{(w)}}^T L^{(w)} U^{(w)}} \\
& \text{subject to}
& & {U^{(v)}}^T U^{(v)} =  I \hspace{10pt} \forall 1 \leq v \leq m
\end{aligned}
\end{equation*}
$$

The $\lambda$ parameter balances the spectral clustering objective and the disagreement among the 
representations. The problem of 
joint optimization can be solved using alternating maximization. Given $U^{(w)}, 1 \leq w \leq m $, the following 
problem of optimization is obtained for $U^{(v)}, v\neq w$:
$$
\begin{equation*}
\begin{aligned}
& \underset{U^{(v)} \in R^{n \times k}}{\text{max}}
& & \Tr{ {U^{(v)}}^T \left(  LM^{(v)} \right) U^{(v)}} \\
& \text{subject to}
& & {U^{(v)}}^T U^{(v)} =   I
\end{aligned}
\end{equation*}
$$

resulting in a traditional clustering algorithm with the Laplacian matrix modified  
$$
LM^{(v)} = L^{(v)} + \lambda \sum\limits_{{\begin{matrix} 1 \leq w \leq m \\ v \neq w \end{matrix}}} U^{(w)}{U^{(w)}}^T 
$$


## Example
This example uses the same dataset as the authors of the method. "The UCI Handwritten digits data is taken from the handwritten digits (0-9) data from the UCI repository. The dataset consists of 2000 examples, with view-1
being the 76 Fourier coefficients, and view-2 being the 216 profile correlations of each example image."
It is used a Gausisan kernel for computing graph similarities and the $\sigma$ is the median of the pair-wise Euclidean distances between the data points. 
The Co-regularization parameter is $\lambda=0.01$.
The output of the example shows the normalized mutual information (NMI) as the clustering quality evaluation measure for a single-view case and the multi-view case.

- Dataset: https://archive.ics.uci.edu/ml/datasets/Multiple+Features


In [5]:
using SpectralClustering, Extras, Distances, Extras, Clustering, Latexify, LaTeXStrings

function weight(i::Integer, neigh, v, m, scale)
    return exp.(-Distances.colwise(SqEuclidean(),m,v)./(2*scale^2))
end
datasets  = digit_features()
graphs = []
for d in datasets
    scale = median(pairwise(Euclidean(),d))
    knnconfig   = KNNNeighborhood(d,10)
    weight_oracle = (i,neigh,v,m)->weight(i,neigh,v,m,copy(scale))
    push!(graphs,create(Float32,knnconfig,weight_oracle,d))
end

multiview = CoRegularizedMultiView(10.0,
                                     [ View(NgLaplacian(5), 0.01), 
                                       View(NgLaplacian(5), 0.01) ])

perf = []
U1 = embedding(multiview, graphs; disagreement = perf)
U1        = clusterize( KMeansClusterizer(10), U1)
U2        = clusterize(NgLaplacian(5), KMeansClusterizer(10), graphs[1])
labels    = vec(hcat([ones(Integer,200)*j for j=1:10]...))

NMI_1 = round(Extras.NMI(assignments(U1), labels),3);
NMI_2 = round(Extras.NMI(assignments(U2), labels),3);


[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/luciano/.julia/lib/v0.6/InfoZIP.ji for module InfoZIP.
[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/luciano/.julia/lib/v0.6/LegacyStrings.ji for module LegacyStrings.
[39m[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/luciano/.julia/lib/v0.6/DataFrames.ji for module DataFrames.
[39m[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/luciano/.julia/lib/v0.6/BinDeps.ji for module BinDeps.
[39m[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/luciano/.julia/lib/v0.6/Latexify.ji for module Latexify.
[39m

In [1]:
plt = Plots.plot(perf, ylim=(maximum(perf)/1.5, maximum(perf)), legend=false, 
                       seriestype=:scatter, xlabel="Iteration", ylabel="Disagreement")
plt_html = IOBuffer()
show(plt_html, "text/html", plt)
display("text/html", 
    """
    <style>
    #NMI h2{
    text-align:center  ;
    margin:1em;
    
    }
    #NMI td {
       padding:2em;
    }
    #NMI_table {
    text-align:center;
    margin-left:auto;
    margin-right:auto;
    }
    #NMI_table td {
         padding: 1em;
    }
    
    </style>
       <table id="NMI" >
    <tr >
      <td>
        <h2 >Performance of the algorithm</h2> 
       <table  id="NMI_table"> 
          <tr > 
            <th> Algorithm </th>     <th> NMI </th> 
    </tr> 
         <tr>
           <td> NMI Multi View </td> <td> $NMI_1 </td>
      </tr>
     <tr>
           <td> NMI Single View </td> <td> $NMI_2 </td>
        </tr>
       </table>
    </td>
    <td>
         <h2>Convergence</h2>
          $(String(plt_html))
    </td>
    </tr>
    </table>""")

LoadError: [91mUndefVarError: Plots not defined[39m

# Comparison

In [7]:
using SpectralClustering, Extras, Distances, Extras, Clustering, Latexify, LaTeXStrings, DataFrames

function weight(i::Integer, neigh, v, m, scale)
    return exp.(-Distances.colwise(SqEuclidean(),m,v)./(2*scale^2))
end
datasets  = digit_features()
graphs = []
for d in datasets
    scale = median(pairwise(Euclidean(),d))
    knnconfig   = KNNNeighborhood(d,10)
    weight_oracle = (i,neigh,v,m)->weight(i,neigh,v,m,copy(scale))
    push!(graphs,create(Float32,knnconfig,weight_oracle,d))
end


multiview   = CoRegularizedMultiView(10.0,
                                     [ View(NgLaplacian(5), 0.01), 
                                       View(NgLaplacian(5), 0.01) ])
multiview_c = embedding(multiview, graphs; disagreement = perf)
multiview_c        = clusterize( KMeansClusterizer(10), multiview_c)

kernel_pr   = KernelProduct(NgLaplacian(5))
kernel_pr_c = clusterize(kernel_pr, KMeansClusterizer(10), graphs)

kernel_ad   = KernelAddition(NgLaplacian(5))
kernel_ad_c = clusterize(kernel_ad, KMeansClusterizer(10), graphs)

labels    = vec(hcat([ones(Integer,200)*j for j=1:10]...))

NMI_1 = round(Extras.NMI(assignments(multiview_c), labels),3);
NMI_2 = round(Extras.NMI(assignments(kernel_pr_c), labels),3);
NMI_3 = round(Extras.NMI(assignments(kernel_ad_c), labels),3);
DataFrame(NMI=[NMI_1, NMI_2, NMI_3])



LoadError: [91mUndefVarError: weight_matrix not defined[39m

In [5]:
display("text/markdown","""
|    | Co-Regularized | Kernel Product | Kernel Addition | \n
|----|----------------|----------------|-----------------|
| NMI|  $NMI_1         | $NMI_2        | $NMI_3          | \n
""")



LoadError: [91mUndefVarError: NMI_1 not defined[39m

# References

In [4]:
using DocUtils
display("text/html",bibliography(["kumar2011co"]))
