# Get Data

In [1]:
import os
import sys; sys.path.append(os.path.join("..", "..", "scripts"))
from generate_X import gen_data, gen_data_sparse

X1 = gen_data(R=3, shape=[100, 110])["X"]
X2 = gen_data(R=4, shape=[100, 120])["X"]
X3 = gen_data(R=2, shape=[100, 130])["X"]



Store data in a dictionary where each has its own name:

In [2]:
Xs = {
    "X1":X1,
    "X2":X2,
    "X3":X3
}

# Settings

In [3]:
params = {
    "n_perturbs":10,
    "n_iters":100,
    "epsilon":0.015,
    "n_jobs":1,
    "init":"nnsvd",
    "use_gpu":False,
    "save_output":True,
    "verbose":False,
    "transpose":False,
    "sill_thresh":0.9,
    'nmf_method':'nmf_fro_mu',
    "save_path":os.path.join("..", "..", "results"), 
}

Each data can have its own NMFk setting. Therefore, we create a dictionary where name is the dataset name from above (Xs), and value is its NMFk parameter settings. Here as an example we set each data points NMFk settings to be same:

In [4]:
nmfk_params = {}
for name, _ in Xs.items():
    nmfk_params[name] = params

We also select the NMFk parameter settings for the SPLIT step. As an example, we set it to be same NMFk parameter setting with the rest of the data:

In [5]:
split_nmfk_params = params

For each data, we select the K ranges to perform search in a dictionary where names are the same data names from above (Xs):

In [6]:
Ks = {
    "X1":range(1, 9, 1),
    "X2":range(1, 10, 1),
    "X3":range(1, 11, 1),
}

Finally we put together the SPLIT method's settings:

In [7]:
SPLIT_PARAMS = {
    # Settings and data as defined above
    "Xs":Xs,
    "Ks":Ks,
    "nmfk_params":nmfk_params,
    "split_nmfk_params":split_nmfk_params,
    # K search step-size when doing split
    "Ks_split_step":1,
    # K search minimum K to look at when doing split
    "Ks_split_min":1,
    # Uses GPU if True when doing regression
    "H_regress_gpu":False, 
    # What method to use when constructing the H Learned (can be "regress" or "MitH")
    "H_learn_method":"regress",
    # How many H update iterations if doing regression
    "H_regress_iters":1000,
    # If doing regression for H Learned, which update to use (can be "fro", or "kl")
    "H_regress_method":"fro",
    # How to initilize H Learned when doing regression (can be "random" or "MitH")
    "H_regress_init":"MitH", 
    # if True, prints progress of SPLIT method
    "verbose":True,
    # random seed
    "random_state":42
}

# Begin

In [8]:
from TELF.factorization import SPLIT

model = SPLIT(**SPLIT_PARAMS)



In [9]:
model.fit()

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Applying NMFk: X1
Applying NMFk: X2
Applying NMFk: X3
Applying SPLIT NMFk


<TELF.factorization.SPLIT.SPLIT at 0x15703ad50>

In [10]:
results = model.transform()

Learning H: X1
Learning H: X2
Learning H: X3


# New W and H for each X is returned

In [11]:
results.keys()

dict_keys(['X1', 'X2', 'X3'])

In [12]:
results["X1"].keys()

dict_keys(['W', 'H'])

In [13]:
W_X1 = results["X1"]["W"]
H_X1 = results["X1"]["H"]
print("W shape=", W_X1.shape)
print("H shape=", H_X1.shape)

W shape= (100, 7)
H shape= (7, 110)


In [14]:
W_X2 = results["X2"]["W"]
H_X2 = results["X2"]["H"]
print("W shape=", W_X2.shape)
print("H shape=", H_X2.shape)

W shape= (100, 7)
H shape= (7, 120)


In [15]:
W_X3 = results["X3"]["W"]
H_X3 = results["X3"]["H"]
print("W shape=", W_X3.shape)
print("H shape=", H_X3.shape)

W shape= (100, 7)
H shape= (7, 130)


# We can access detailed data

In [16]:
for name, info in model.information.items():
    print(name)
    print("X shape=", info["data"].shape)
    print("Ks=", info["Ks"])
    print("k=", info["k"])
    print("W shape=", info["W"].shape)
    print("H shape=", info["H"].shape)
    print("M shape=", info["M"].shape)
    print("H Learned shape=", info["H_learned"].shape)
   
    print("-----------------")

X1
X shape= (100, 110)
Ks= range(1, 9)
k= 3
W shape= (100, 3)
H shape= (3, 110)
M shape= (7, 3)
H Learned shape= (7, 110)
-----------------
X2
X shape= (100, 120)
Ks= range(1, 10)
k= 4
W shape= (100, 4)
H shape= (4, 120)
M shape= (7, 4)
H Learned shape= (7, 120)
-----------------
X3
X shape= (100, 130)
Ks= range(1, 11)
k= 2
W shape= (100, 2)
H shape= (2, 130)
M shape= (7, 2)
H Learned shape= (7, 130)
-----------------


In [17]:
print("SPLIT")
print("X shape=", model.split_information["X"].shape)
print("Ks=", model.split_information["Ks"])
print("k", model.split_information["k"])
print("W shape=", model.split_information["W"].shape)
print("H shape=", model.split_information["H"].shape)

SPLIT
X shape= (100, 9)
Ks= range(1, 8)
k 7
W shape= (100, 7)
H shape= (7, 9)
