# Create Data

In [2]:
import sys; sys.path.append("../../scripts/")
from generate_X import gen_trinmf_data
import matplotlib.pyplot as plt

kwkh=(5,3)
shape=(20,20)
data = gen_trinmf_data(shape=shape, 
                       kwkh=kwkh, 
                       factor_wh=(0.5, 1.0), 
                       factor_S=1,
                       random_state=10)
data.keys()

dict_keys(['X', 'W', 'S', 'H'])

In [3]:
X = data["X"]
Wtrue = data["W"]
Strue = data["S"]
Htrue = data["H"]
X.shape, Wtrue.shape, Strue.shape, Htrue.shape

((20, 20), (20, 5), (5, 3), (3, 20))

In [4]:
import numpy as np
Wcor= np.corrcoef(Wtrue.T)
maxcor = np.max(np.abs(Wcor[np.triu_indices(kwkh[0],k=2)]))
print(f'maximum correlation = {maxcor:.2f}')

maximum correlation = 0.39


# Initilize the Model

In [7]:
nmfk_params = {
    "n_perturbs":64,
    "n_iters":2000,
    "epsilon":0.01,
    "n_jobs":-1,
    "init":"nnsvd",
    "use_gpu":False,
    "save_path":"../../results/",
    "verbose":True,
    "sill_thresh":0.8,
    "nmf_method":"nmf_fro_mu", 
    "perturb_type":"uniform", 
    "calculate_error":True,
    "pruned":True,
    "predict_k":True,
    "predict_k_method":"sill",
    "transpose":False,
    "mask":None,
    "use_consensus_stopping":False,
    "calculate_pac":True,
    "consensus_mat":True,
    "simple_plot":True,
    "collect_output":True
}

tri_nmfk_params = {
    "experiment_name":"TriNMFk",
    "nmfk_params":nmfk_params,
    "nmf_verbose":False,
    "use_gpu":True,
    "n_jobs":-1,
    "mask":None,
    "use_consensus_stopping":False,
    "alpha":(0,0),
    "n_iters":100,
    "n_inits":10,
    "joblib_backend":"multiprocessing",
    "pruned":False,
    "transpose":False,
    "verbose":True
}

In [8]:
from TELF.factorization import TriNMFk
model = TriNMFk(**tri_nmfk_params)

Performing NMF with  nmf_fro_mu




# Perform NMFk First

In [9]:
Ks = range(1,8,1)
note = "This the the NMFk portion of the TriNMFk method!"
results = model.fit_nmfk(X, Ks, note)

[Parallel(n_jobs=7)]: Using backend MultiprocessingBackend with 7 concurrent workers.
  c /= stddev[:, None]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
[Parallel(n_jobs=7)]: Done   2 out of   7 | elapsed:   10.1s remaining:   25.2s
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
[Parallel(n_jobs=7)]: Done   7 out of   7 | elapsed:   16.8s finished


In [10]:
results.keys()

dict_keys(['time', 'k_predict'])

# Here now look at the outputs and choose k1 and k2 manually

In [11]:
! ls ../../results/TriNMFk_64perts_2000iters_0.01eps_nnsvd-init

FINAL_k=1-7.png            k_1_7_cophenetic_coeff.png
WH_k=1.npz                 k_1_con_mat.png
WH_k=2.npz                 k_2_con_mat.png
WH_k=3.npz                 k_3_con_mat.png
WH_k=4.npz                 k_4_con_mat.png
WH_k=5.npz                 k_5_con_mat.png
WH_k=6.npz                 k_6_con_mat.png
WH_k=7.npz                 k_7_con_mat.png
experiment.log


# Perform TriNMFk with the selected k1 and k2

In [12]:
k1k2=(5,3)
tri_nmfk_results = model.fit_tri_nmfk(X, k1k2)

[Parallel(n_jobs=8)]: Using backend MultiprocessingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    7.7s remaining:    5.1s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    9.7s finished


In [13]:
tri_nmfk_results.keys()

dict_keys(['W', 'S', 'H', 'errors'])

In [14]:
W = tri_nmfk_results["W"]
S = tri_nmfk_results["S"]
H = tri_nmfk_results["H"]

In [15]:
W.shape, S.shape, H.shape

((20, 5), (5, 3), (3, 20))