<a href="https://colab.research.google.com/github/raminass/deep-NMF/blob/master/comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [100]:
from utils import *
import numpy as np
from sklearn.decomposition import NMF
import nimfa as nm
import pandas as pd
import torch.optim as optim
from matplotlib import pyplot as plt
from my_layers import *

#  Execute all 'print' statments of a cell instead of only the last one
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


In [72]:
signatures_df = pd.read_csv("../data/simulated/ground.truth.syn.sigs.csv", sep=",")
exposures_df = pd.read_csv("../data/simulated/ground.truth.syn.exposures.csv", sep=",")
category_df = pd.read_csv("../data/simulated/ground.truth.syn.catalog.csv", sep=",")
# to use genetic synthytic data
W = signatures_df.iloc[:, 2:].values  # (f,k)
H = exposures_df.iloc[:, 1:].values  # (k,n)
V = category_df.iloc[:, 2:].values  # (f,n)
n_components = H.shape[0]
features, samples = V.shape

In [69]:
# to use simulated data
n_components = 21
W = abs(np.random.randn(96, n_components))  # (f,k) normal
H = abs(np.random.randn(n_components, 1350))  # (k,n) normal
V = W.dot(H) + 0.1 * np.random.randn(96, 1350)  # (f,n)


In [75]:
TRAIN_SIZE = 0.80
mask = np.random.rand(samples) < TRAIN_SIZE
H_init = initialize_exposures(V, n_components, method="ones")  # (n,k)

In [78]:
scikit_mu = NMF(n_components, solver="mu")
w_scikit_mu = scikit_mu.fit_transform(V[:, mask].T)
# scikit_mu.reconstruction_err_

scikit_cd = NMF(n_components)
w_scikit_cd = scikit_cd.fit_transform(V[:, mask].T)
# scikit_cd.reconstruction_err_

nimfa_model = nm.Nmf(V[:, mask], rank=n_components, max_iter=200, track_error=True)
nimfa_fit = nimfa_model.factorize()
# np.sqrt(nimfa_model.tracker.get_error())[-1]

w_mu, h_mu = nimfa_model.seed.initialize(V[:, mask],n_components,nimfa_model.options)
w_mu = np.asarray(w_mu)
h_mu = np.asarray(h_mu)
mu_training_cost = []
for i in range(200):
    w_mu, h_mu = mu_update(V[:, mask], w_mu, h_mu, 0, 0)
    mu_training_cost.append(cost_function(V[:, mask], w_mu, h_mu, 0, 0))
# mu_training_cost[-1]

In [79]:
cost_function(V[:, mask], scikit_mu.components_.T,w_scikit_mu.T,0,0) # equal to np.power(frobenius_reconstruct_error(V, nimfa_model.W,nimfa_model.H),2)*0.5
cost_function(V[:, mask], scikit_cd.components_.T,w_scikit_cd.T,0,0) 
cost_function(V[:, mask], nimfa_model.W,nimfa_model.H,0,0)
cost_function(V[:, mask], w_mu,h_mu,0,0)

68076398.27987593

242349.3847509567

48719612.1061976

45016844.625070356

## Prediction

In [85]:
scikit_mu_pred = scikit_mu.transform(V[:, ~mask].T)
scikit_cd_pred = scikit_cd.transform(V[:, ~mask].T)
# nimfa_model_pred = nimfa_model.predict(V[:, ~mask])
_, h_mu_pred = nimfa_model.seed.initialize(V[:, ~mask],n_components,nimfa_model.options)
h_mu_pred = np.asarray(h_mu_pred)
mu_test_cost = []
for i in range(50):
    _, h_mu_pred = mu_update(V[:, ~mask], w_mu, h_mu_pred, 0, 0,update_W=False)
    mu_test_cost.append(cost_function(V[:, ~mask], w_mu, h_mu_pred, 0, 0))

In [90]:
cost_function(V[:, ~mask], scikit_mu.components_.T,scikit_mu_pred.T,0,0) # equal to np.power(frobenius_reconstruct_error(V, nimfa_model.W,nimfa_model.H),2)*0.5
cost_function(V[:, ~mask], scikit_cd.components_.T,scikit_cd_pred.T,0,0) 
cost_function(V[:, ~mask], w_mu,h_mu_pred,0,0)

289966146.78805625

6085561.870241653

74685425.82434121

# Network

In [94]:
H_init = np.ones((n_components, samples))

#Tensoring
v_train = torch.from_numpy(V[:, mask].T).float()
v_test = torch.from_numpy(V[:, ~mask].T).float()

h_train = torch.from_numpy(w_scikit_cd).float()
h_test = torch.from_numpy(scikit_cd_pred).float()

h_0_train = torch.from_numpy(H_init[:, mask].T).float()
h_0_test = torch.from_numpy(H_init[:, ~mask].T).float()

W_tensor = torch.from_numpy(scikit_cd.components_).float()


In [105]:
from utils import *
lr = 0.0009
num_layers = 7
network_train_iteration = 400

constraints = WeightClipper(lower=0)
deep_nmf = MultiFrDNMFNet(num_layers, n_components, features)
deep_nmf.apply(constraints) #prints the network 
criterion = nn.MSELoss(reduction="mean")

optimizerADAM = optim.Adam(deep_nmf.parameters(), lr=lr)

    # Train the Network
inputs = (h_0_train, v_train)


MultiFrDNMFNet(
  (deep_nmfs): ModuleList(
    (0): FrNMFLayer(
      (fc1): Linear(in_features=39, out_features=39, bias=False)
      (fc2): Linear(in_features=96, out_features=39, bias=False)
    )
    (1): FrNMFLayer(
      (fc1): Linear(in_features=39, out_features=39, bias=False)
      (fc2): Linear(in_features=96, out_features=39, bias=False)
    )
    (2): FrNMFLayer(
      (fc1): Linear(in_features=39, out_features=39, bias=False)
      (fc2): Linear(in_features=96, out_features=39, bias=False)
    )
    (3): FrNMFLayer(
      (fc1): Linear(in_features=39, out_features=39, bias=False)
      (fc2): Linear(in_features=96, out_features=39, bias=False)
    )
    (4): FrNMFLayer(
      (fc1): Linear(in_features=39, out_features=39, bias=False)
      (fc2): Linear(in_features=96, out_features=39, bias=False)
    )
    (5): FrNMFLayer(
      (fc1): Linear(in_features=39, out_features=39, bias=False)
      (fc2): Linear(in_features=96, out_features=39, bias=False)
    )
    (6): FrNMFL