## ECNMF

In [22]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.neighbors import kneighbors_graph
from sklearn.metrics.cluster import adjusted_rand_score as ARI
from sklearn.metrics.cluster import normalized_mutual_info_score as NMI
import torch.nn.functional as F
from sklearn import preprocessing as pre


import warnings
warnings.filterwarnings('ignore')
from util import eval, loadDataset, normalization, kmeansInitialization

eps = torch.tensor(10 ** -10)

datasets = ['3Sources.npy',
'BBCSport.npy',
'Caltech101.npy',
'Caltech_2.npy',
'Citeseer.npy',
'Coil100.npy',
'Cora.npy',
'EYaleB10.npy',
'Handwritten.npy',
'MNIST10.npy',
'UCIdigit.npy',
'Umist.npy',
'Yale32.npy',
'Yeast.npy',
'Cora2.npy',
'texas.npy',
'wisconsin.npy',
'washington.npy',
'cornell.npy',
'digit2.npy']


X0, Y, V, c = loadDataset(19)


r = 20
r2 = int(r/2)
k = 5



alpha = 1/V
mu = 0.1
lam = 0.0001
gam = 0.0001

o = torch.ones(V)/V
beta = o/torch.norm(o)



iter = 200

W = []
Wp = []
Hc = []
Hs = []
X = []


for v in range(V):

    x_scale = pre.MinMaxScaler().fit_transform(X0[v].T).T
    
    X.append(torch.tensor(x_scale).type(torch.float32))
    
    
    
    d, n = X[v].shape
    
    W0 = torch.rand(d, r)
    W.append(W0/torch.sum(W0, dim=0))
    

    Wp.append(torch.eye(r2))
    
    Hs.append(torch.rand(r2, n))
    Hc.append(torch.rand(r2, n))
Hstar = torch.rand(r2, n)
    


An = []
Dn = []

for v in range(V):
    
    An0 = kneighbors_graph(X[v].T, k, mode='connectivity', include_self=False).toarray()
    An0 = torch.tensor(An0).type(torch.float32)
    An0 = torch.maximum(An0, An0.T)

    An.append(An0)
    Dn.append(torch.diag(torch.sum(An0, dim = 1)))
    

for v in range(V):
    X[v] = X[v]/torch.maximum(torch.norm(X[v], dim=0), eps)


# Optimization
err = torch.zeros(iter)

for t in range(iter):
    
    # Updating Hstar
    H = torch.zeros(n, r2)
    for v in range(V):
        H += beta[v] * Hc[v].T @ Wp[v]
    U, Si, VT = torch.svd(H)
    Hstar = VT.T @ U.T


    for v in range(V):
    
        # Updating Ws
        H2 = torch.concat((Hs[v], Hc[v]), 0)
        Wn = X[v]      @ H2.T
        Wd = W[v] @ H2 @ H2.T
        W[v] = W[v] * (Wn / torch.maximum(Wd, eps))

        # Normalization on columns (Sum to one)
        W[v] = W[v]/torch.sum(W[v], dim=0)

    for v in range(V):
        
        Psi = Wp[v] @ Hstar
        Psip = (torch.abs(Psi) + Psi) / 2
        Psin = (torch.abs(Psi) - Psi) / 2

        
        # Updating Hs
        Hn = alpha * (W[v][:,:r2].T @ X[v])               + mu * (Hs[v] @ An[v]) 
        Hd = alpha * (W[v][:,:r2].T @ W[v][:,:r2] @ Hs[v]) + mu * (Hs[v] @ Dn[v]) + lam * (Hc[v])
        Hs[v] = Hs[v] * (Hn / torch.maximum(Hd, eps)) ** 0.5

        # Updating Hc
        Hn = W[v][:,r2:].T @ X[v]               + mu * (Hc[v] @ An[v]) + gam * (beta[v] * Psip)
        Hd = W[v][:,r2:].T @ W[v][:,r2:] @ Hc[v] + mu * (Hc[v] @ Dn[v]) + gam * (beta[v] * Psin)
        Hc[v] = Hc[v] * (Hn / torch.maximum(Hd, eps)) ** 0.5


    # Updating Wp
    for v in range(V):
        
        Q = beta[v] * (Hc[v] @ Hstar.T) 
        U, Si, VT = torch.svd(Q)
        Wp[v] = U @ VT 
        
    # Updating Beta
        
    f = torch.zeros(V)
    for v in range(V):
        f[v] = torch.trace(Hc[v].T @ Wp[v] @ Hstar)
    beta = f/torch.norm(f)


        # Calculating cost function
#     err[t] += torch.norm(X[v] - W[v] @ H[v]) ** 2 + gamma * torch.trace(H[v] @ (D[v] - A[v]) @ H[v].T) 
           
# plt.plot(err)


Hfinal = []
for v in range(V):
    Hfinal.append(Hs[v])
Hfinal.append(Hstar)

Hfinal = torch.concat(Hfinal, 0)


pred = KMeans(n_clusters=c).fit(Hfinal.T).labels_

nmi, acc, ari, f1mi, f1ma, f1we, pur = eval(Y, pred)
nmi, acc, ari, f1mi, f1ma, f1we, pur

(0.8968154763809111,
 0.8855,
 0.8404794044450947,
 0.8855,
 0.8781684349960402,
 0.8781684349960401,
 0.8855)

In [69]:
from scipy.io import loadmat

Xs = loadmat('../Datasets/Handwritten10_6_2k.mat')['data'][0]
y = loadmat('../Datasets/Handwritten10_6_2k.mat')['label'].flatten()

# X = torch.load('../Datasets/Handwritten.npy')

# Xs.append(y)


data = []
for x in Xs:
    data.append(x)

data.append(y)

data

torch.save(data,'../Datasets/digit2.npy')

In [18]:
x_norm = pre.MinMaxScaler().fit_transform(X0[0].T).T
np.sum(x_norm)

202874.46724675456

In [11]:
An[5].sum()

tensor(14356.)

In [15]:
torch.sum(W[0], dim=0)

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000])