In [1]:
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt

import sys
sys.path.insert(1, '../')
sys.path.append("./src")

import ssnmf
from multi_nmf import run_HNMF_unsupervised_single, run_HNMF_supervised_single

import torch.nn as nn
from torch.autograd import Variable

from nltk.corpus import stopwords
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
import re

from ssnmf import SSNMF
torch.set_default_tensor_type(torch.DoubleTensor)


## Load and Preprocessing for 20 Newsgroup Data Set

In [2]:
remove = ('headers','footers','quotes')
stopwords_list = stopwords.words('english')
stopwords_list.extend(['thanks','edu','also','would','one','could','please','really','many','anyone','good','right','get','even','want','must','something','well','much','still','said','stay','away','first','looking','things','try','take','look','make','may','include','thing','like','two','or','etc','phone','oh','email'])


categories = [
 'comp.graphics',

 'comp.sys.mac.hardware',
 'misc.forsale',
 'rec.motorcycles',
 'rec.sport.baseball',
 'sci.med',
 'sci.space',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.religion.misc'
 ]

newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=remove)

# remove numbers
data_cleaned = [re.sub(r'\d+','', file) for file in newsgroups_train.data]

vectorizer = TfidfVectorizer(stop_words=stopwords_list)
vectors = vectorizer.fit_transform(data_cleaned).transpose()
idx_to_word = np.array(vectorizer.get_feature_names())

X = vectors
d, n = np.shape(X)
Y = np.zeros((n))

labels = {0:0, 1:0, 2:1, 3:2, 4:2, 5:3, 6:3, 7:4, 8:4, 9:5}

for i in range(n-1):
    label = newsgroups_train.target[i]
    Y[i] = label

X = torch.from_numpy(X.todense())
Y = torch.from_numpy(Y).long()


m = X.shape[0]
k1 = 10
k2 = 6

sub = 100 #HOW MANY PER CLASS
count = np.zeros((k1))

X_new = torch.zeros((X.shape[0], sub*k1))
Y_new = torch.zeros((sub*k1)).long()
Y_10 = torch.zeros((sub*k1)).long()
j = 0
for i in range(Y.shape[0]):
    if(count[Y[i]] >= sub):
        continue
    count[Y[i]] += 1
    X_new[:,j] = X[:,i]
    Y_new[j] = labels[int(Y[i])]
    Y_10[j] = int(Y[i])
    j += 1

X = X_new
Y = Y_new

ind = np.argsort(Y_10)
X = X[:,ind]
Y = Y[ind]
Y_10 = Y_10[ind]

split = 0.75
L = np.zeros((6, Y.shape[0]))
for i in range(10):
    L[:,i*sub:i*sub+(int(split*sub))] = 1
    
L_10 = np.zeros((10, Y.shape[0]))
for i in range(10):
    L_10[:,i*sub:i*sub+(int(split*sub))] = 1

X = np.asarray(X)
Y = np.asarray(Y)
Y_10 = np.asarray(Y_10)

In [3]:
Y_hot = np.zeros((Y.shape[0], Y.max()+1))
Y_hot[np.arange(Y.shape[0]),Y] = 1
Y_hot = Y_hot.T

Y_hot_10 = np.zeros((Y_10.shape[0], Y_10.max()+1))
Y_hot_10[np.arange(Y_10.shape[0]),Y_10] = 1
Y_hot_10 = Y_hot_10.T

## Run HNMF and Multi-HNTF

In [4]:
results_all_layer_1 = []
results_all_layer_1_sup = []
results_all_layer_2_unsup = []
results_all_layer_2_sup = []

results_all_layer_2_Hunsup = []
results_all_layer_2_Hsup = []

lam = 1
N = 800
num_trials = 10

for i in range(num_trials):
    
    # Run First Layer of Unsupervised NMF
    # ------------------------------------
    
    np.random.seed(i)
    model_1 = SSNMF(X,10,modelNum=1)
    model_1.mult(numiters = N)

    results_layer_1 = {}
    results_layer_1['A'] = model_1.A
    results_layer_1['S'] = model_1.S
    results_all_layer_1.append(results_layer_1)
    
    # Run First Layer of Supervised NMF
    # ------------------------------------
    
    np.random.seed(i)
    model_1_sup = SSNMF(X,10,Y = Y_hot_10,L=L_10,lam=lam,modelNum=3)
    model_1_sup.mult(numiters = N)

    results_layer_1_sup = {}
    results_layer_1_sup['A'] = model_1_sup.A
    results_layer_1_sup['S'] = model_1_sup.S
    results_all_layer_1_sup.append(results_layer_1_sup)
    
    
    # Run Second Layer of Unsupervised NMF
    # ------------------------------------
    
    np.random.seed(i)
    model_2 = SSNMF(model_1.S,6,modelNum=1)
    model_2.mult(numiters = N)
    
    results_layer_2_unsup = {}
    results_layer_2_unsup['A_local'] = model_2.A
    results_layer_2_unsup['A'] = model_1.A @ model_2.A
    results_layer_2_unsup['S'] = model_2.S
    results_all_layer_2_unsup.append(results_layer_2_unsup)
    
    
    # Run Second Layer of Supervised NMF
    # ------------------------------------
    
    np.random.seed(i)
    model_3 = SSNMF(model_1_sup.S,6,Y = Y_hot,L=L,lam=lam, modelNum=3)
    model_3.mult(numiters = N)
    
    results_layer_2_sup = {}
    results_layer_2_sup['A_local'] = model_3.A
    results_layer_2_sup['A'] = model_1.A @ model_3.A
    results_layer_2_sup['S'] = model_3.S
    results_layer_2_sup['B'] = model_3.B
    results_all_layer_2_sup.append(results_layer_2_sup)
    
    # Run Second Layer of Unspervised HNMF
    # ------------------------------------
    np.random.seed(i)
    W = run_HNMF_unsupervised_single(X, model_1.A, model_1.S, 6, N=N)
    results_layer_2_Hunsup = {}
    results_layer_2_Hunsup['W'] = W
    results_layer_2_Hunsup['A'] = model_1.A @ W
    results_layer_2_Hunsup['S'] = W.T @ model_1.S
    results_all_layer_2_Hunsup.append(results_layer_2_Hunsup)
    
    # Run Second Layer of Supervised HNMF
    # ------------------------------------
    np.random.seed(i)
    W, B = run_HNMF_supervised_single(X, model_1_sup.A, model_1_sup.S, Y_hot * L, 6, N=N,lam=lam)
    results_layer_2_Hsup = {}
    results_layer_2_Hsup['W'] = W
    results_layer_2_Hsup['B'] = B
    results_layer_2_Hsup['A'] = model_1_sup.A @ W
    results_layer_2_Hsup['S'] = W.T @ model_1_sup.S
    results_all_layer_2_Hsup.append(results_layer_2_Hsup)
    
    print("Done with run...   ", i+1)

Done with run...    1
Done with run...    2
Done with run...    3
Done with run...    4
Done with run...    5
Done with run...    6
Done with run...    7
Done with run...    8
Done with run...    9
Done with run...    10


## Measure Reconstruction Loss and Accuracy

In [5]:
def get_acc(results, rank=6):

    
    if rank==6:
        B = np.multiply(Y_hot,L) @ np.linalg.pinv(results['S'])
        Y_pred = np.argmax(np.dot(B,results['S']), axis=0)
        acc = Y[L[0]==0][Y_pred[L[0]==0]==Y[L[0]==0]].shape[0] / Y[L[0]==0].shape[0]
    else:
        B = np.multiply(Y_hot_10,L_10) @ np.linalg.pinv(results['S'])
        Y_pred = np.argmax(np.dot(B,results['S']), axis=0)
        acc = Y_10[L[0]==0][Y_pred[L[0]==0]==Y_10[L[0]==0]].shape[0] / Y_10[L[0]==0].shape[0]

    return acc

def get_recon(results):
    return np.linalg.norm(X - results['A'] @ results['S'])

acc_all_layer_1 = []
acc_all_layer_1_sup = []
acc_all_layer_2_unsup = []
acc_all_layer_2_sup = []
acc_all_layer_2_Hunsup = []
acc_all_layer_2_Hsup = []


recon_all_layer_1 = []
recon_all_layer_1_sup = []
recon_all_layer_2_unsup = []
recon_all_layer_2_sup = []
recon_all_layer_2_Hunsup = []
recon_all_layer_2_Hsup = []

for i in range(num_trials):
    
    results_layer_1 = results_all_layer_1[i]
    results_layer_1_sup = results_all_layer_1_sup[i]
    results_layer_2_unsup = results_all_layer_2_unsup[i]
    results_layer_2_sup = results_all_layer_2_sup[i]
    results_layer_2_Hunsup = results_all_layer_2_Hunsup[i]
    results_layer_2_Hsup = results_all_layer_2_Hsup[i]
    
    
    acc_all_layer_1.append(get_acc(results_layer_1))
    acc_all_layer_1_sup.append(get_acc(results_layer_1_sup))
    acc_all_layer_2_unsup.append(get_acc(results_layer_2_unsup))
    acc_all_layer_2_sup.append(get_acc(results_layer_2_sup))
    acc_all_layer_2_Hunsup.append(get_acc(results_layer_2_Hunsup))
    acc_all_layer_2_Hsup.append(get_acc(results_layer_2_Hsup))
    
    recon_all_layer_1.append(get_recon(results_layer_1))
    recon_all_layer_1_sup.append(get_recon(results_layer_1_sup))
    recon_all_layer_2_unsup.append(get_recon(results_layer_2_unsup))
    recon_all_layer_2_sup.append(get_recon(results_layer_2_sup))
    recon_all_layer_2_Hunsup.append(get_recon(results_layer_2_Hunsup))
    recon_all_layer_2_Hsup.append(get_recon(results_layer_2_Hsup))

    print("Done with calculation...   ", i+1)

Done with calculation...    1
Done with calculation...    2
Done with calculation...    3
Done with calculation...    4
Done with calculation...    5
Done with calculation...    6
Done with calculation...    7
Done with calculation...    8
Done with calculation...    9
Done with calculation...    10


In [6]:
print("Layer 1 unsupervised accuracy...  ", sum(acc_all_layer_1) / num_trials)
print("Layer 1 supervised accuracy...  ", sum(acc_all_layer_1_sup) / num_trials)

print("Layer 2 unsupervised accuracy...  ", sum(acc_all_layer_2_unsup) / num_trials)
print("Layer 2 supervised accuracy...  ", sum(acc_all_layer_2_sup) / num_trials)
print()
print("Layer 2 H unsupervised accuracy...  ", sum(acc_all_layer_2_Hunsup) / num_trials)
print("Layer 2 H supervised accuracy...  ", sum(acc_all_layer_2_Hsup) / num_trials)

Layer 1 unsupervised accuracy...   0.5932000000000001
Layer 1 supervised accuracy...   0.7768
Layer 2 unsupervised accuracy...   0.5071999999999999
Layer 2 supervised accuracy...   0.6364

Layer 2 H unsupervised accuracy...   0.5159999999999999
Layer 2 H supervised accuracy...   0.7368


In [7]:
print("Layer 1 unsupervised recon loss...  ", sum(recon_all_layer_1) / num_trials)
print("Layer 1 supervised recon loss...  ", sum(recon_all_layer_1_sup) / num_trials)
print()
print("Layer 2 unsupervised recon loss...  ", sum(recon_all_layer_2_unsup) / num_trials)
print("Layer 2 supervised recon loss...  ", sum(recon_all_layer_2_sup) / num_trials)
print()
print("Layer 2 H unsupervised recon loss...  ", sum(recon_all_layer_2_Hunsup) / num_trials)
print("Layer 2 H supervised recon loss...  ", sum(recon_all_layer_2_Hsup) / num_trials)

Layer 1 unsupervised recon loss...   30.650902366052634
Layer 1 supervised recon loss...   30.77671656065876

Layer 2 unsupervised recon loss...   30.819847251127662
Layer 2 supervised recon loss...   31.446235226119178

Layer 2 H unsupervised recon loss...   30.81340608097356
Layer 2 H supervised recon loss...   30.9124314614065
