In [23]:
import scipy.io as sio
import numpy as np

In [24]:
data=sio.loadmat('tcgabrca_molecular_data_shuffled.mat')

In [25]:
sio.whosmat('tcgabrca_molecular_data_shuffled.mat')

[('labelshuffled', (1, 380), 'double'),
 ('methylshuffled', (574, 380), 'double'),
 ('mirnashuffled', (1046, 380), 'double'),
 ('patientidshuffled', (380,), 'char'),
 ('rnashuffled', (17814, 380), 'double'),
 ('rppashuffled', (171, 380), 'double')]

In [26]:
labelshuffled=data['labelshuffled']
methylshuffled=data['methylshuffled']
mirnashuffled=data['mirnashuffled']
patientidshuffled=data['patientidshuffled']
rnashuffled=data['rnashuffled']
rppashuffled=data['rppashuffled']

In [27]:
rppashuffled
rnashuffled
mirnashuffled
methylshuffled

array([[ 0.51349899,  0.09802236,  0.11117684, ...,  0.49445084,
         0.11807154,  0.56937541],
       [ 0.44184436,  0.24071421,  0.15905002, ...,  0.08848342,
         0.21279433,  0.42718625],
       [ 0.11232587,  0.06711152,  0.08982711, ...,  0.04708886,
         0.08117119,  0.07514712],
       ..., 
       [ 0.14968153,  0.13361364,  0.29870317, ...,  0.32186263,
         0.37172342,  0.13413438],
       [ 0.05477097,  0.06557652,  0.06575568, ...,  0.06795239,
         0.05347664,  0.84525458],
       [ 0.19690799,  0.14867467,  0.44637444, ...,  0.07732542,
         0.08477664,  0.09272374]])

In [28]:
def ssnmf(Y, R=3, n_iter=50, F=[], init_G=[], init_H=[], init_U=[], verbose=False):
    """
    decompose non-negative matrix to components and activation with semi-supervised NMF

    Y ≈ FG + HU
    Y ∈ R (m, n)
    F ∈ R (m, x)
    G ∈ R (x, n)
    H ∈ R (m, k)
    U ∈ R (k, n)

    parameters
    ----
    Y: target matrix to decompose
    R: number of bases to decompose
    n_iter: number for executing objective function to optimize
    F: matrix as supervised base components
    init_W: initial value of W matrix. default value is random matrix
    init_H: initial value of W matrix. default value is random matrix

    return
    ----
    Array of:
    0: matrix of F
    1: matrix of G
    2: matrix of H
    3: matrix of U
    4: array of cost transition
    """

    eps = np.spacing(1)

    # size of input spectrogram
    M = Y.shape[0];
    N = Y.shape[1];
    X = F.shape[1]

    # initialization
    if len(init_G):
        G = init_G
        X = init_G.shape[1]
    else:
        G = np.random.rand(X, N)

    if len(init_U):
        U = init_U
        R = init_U.shape[0]
    else:
        U = np.random.rand(R, N)

    if len(init_H):
        H = init_H
        R = init_H.shape[1]
    else:
        H = np.random.rand(M, R)

    # array to save the value of the euclid divergence
    cost = np.zeros(n_iter)

    # computation of Lambda (estimate of Y)
    Lambda = np.dot(F, G) + np.dot(H, U)

    # iterative computation
    for it in range(n_iter):

        # compute euclid divergence
        cost[it] = euclid_divergence(Y, Lambda + eps)

        # update of H
        H *= (np.dot(Y, U.T) + eps) / (np.dot(np.dot(H, U) + np.dot(F, G), U.T) + eps)

        # update of U
        U *= (np.dot(H.T, Y) + eps) / (np.dot(H.T, np.dot(H, U) + np.dot(F, G)) + eps)

        # update of G
        G *= (np.dot(F.T, Y) + eps)[np.arange(G.shape[0])] / (np.dot(F.T, np.dot(H, U) + np.dot(F, G)) + eps)

        # recomputation of Lambda (estimate of V)
        Lambda = np.dot(H, U) + np.dot(F, G)

    return [F, G, H, U, cost]

In [29]:
def nmf(Y, R=3, n_iter=50, init_H=[], init_U=[], verbose=False):
    """
    decompose non-negative matrix to components and activation with NMF

    Y ≈ HU
    Y ∈ R (m, n)
    H ∈ R (m, k)
    HU ∈ R (k, n)

    parameters
    ----
    Y: target matrix to decompose
    R: number of bases to decompose
    n_iter: number for executing objective function to optimize
    init_H: initial value of H matrix. default value is random matrix
    init_U: initial value of U matrix. default value is random matrix

    return
    ----
    Array of:
    0: matrix of H
    1: matrix of U
    2: array of cost transition
    """

    eps = np.spacing(1)

    # size of input spectrogram
    M = Y.shape[0]
    N = Y.shape[1]

    # initialization
    if len(init_U):
        U = init_U
        R = init_U.shape[0]
    else:
        U = np.random.rand(R,N);

    if len(init_H):
        H = init_H;
        R = init_H.shape[1]
    else:
        H = np.random.rand(M,R)

    # array to save the value of the euclid divergence
    cost = np.zeros(n_iter)

    # computation of Lambda (estimate of Y)
    Lambda = np.dot(H, U)

    # iterative computation
    for i in range(n_iter):

        # compute euclid divergence
        cost[i] = euclid_divergence(Y, Lambda)

        # update H
        H *= np.dot(Y, U.T) / (np.dot(np.dot(H, U), U.T) + eps)

        # update U
        U *= np.dot(H.T, Y) / (np.dot(np.dot(H.T, H), U) + eps)

        # recomputation of Lambda
        Lambda = np.dot(H, U)

    return [H, U, cost]

In [30]:
def euclid_divergence(V, Vh):
    d = 1 / 2 * (V ** 2 + Vh ** 2 - 2 * V * Vh).sum()
    return d

In [31]:
label_train = labelshuffled[:,0:300]
label_test = labelshuffled[:,300:380]
methyl_train = methylshuffled[:,0:300]
methyl_test = methylshuffled[:,300:380]

In [33]:
ssnmf(methyl_train)

AttributeError: 'list' object has no attribute 'shape'

In [7]:
labelshuffled

array([[2, 2, 3, 2, 3, 2, 1, 3, 1, 3, 0, 0, 2, 2, 0, 3, 2, 3, 3, 3, 3, 2,
        2, 3, 3, 3, 3, 0, 2, 3, 2, 2, 1, 1, 2, 3, 2, 2, 3, 2, 2, 3, 0, 0,
        1, 2, 0, 0, 0, 1, 3, 2, 2, 2, 2, 3, 3, 1, 2, 1, 2, 0, 2, 2, 2, 2,
        2, 1, 2, 3, 0, 3, 2, 0, 2, 2, 2, 2, 2, 2, 3, 1, 2, 1, 2, 2, 0, 0,
        2, 2, 2, 3, 3, 2, 1, 2, 2, 1, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2,
        2, 0, 0, 0, 3, 2, 0, 0, 1, 0, 2, 3, 3, 2, 3, 3, 3, 0, 2, 3, 2, 2,
        2, 2, 0, 2, 2, 2, 3, 3, 3, 2, 2, 0, 3, 3, 1, 3, 2, 2, 1, 0, 1, 3,
        2, 2, 2, 2, 2, 3, 0, 2, 0, 3, 1, 1, 2, 2, 3, 1, 1, 3, 0, 0, 2, 0,
        2, 2, 1, 0, 0, 2, 1, 2, 3, 2, 2, 1, 1, 3, 1, 2, 3, 2, 2, 2, 2, 2,
        2, 0, 0, 2, 2, 3, 2, 1, 1, 1, 2, 1, 2, 1, 2, 3, 3, 2, 2, 0, 0, 3,
        0, 3, 3, 2, 3, 2, 3, 2, 2, 3, 2, 0, 0, 2, 0, 0, 2, 2, 0, 1, 3, 3,
        2, 2, 0, 1, 0, 2, 3, 0, 3, 1, 3, 3, 2, 2, 0, 3, 2, 3, 0, 1, 2, 2,
        2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, 3, 2, 3, 0, 0, 3, 1, 2, 0, 1, 0,
        0, 2, 2, 1, 3, 3, 1, 3, 3, 3, 