In [2]:
%pylab inline
import glob
import spams
import pandas as pd
from numpy.linalg import pinv as moore_penrose_inv
from numpy.linalg import svd, eig
from tqdm import tqdm

Populating the interactive namespace from numpy and matplotlib


In [8]:
prefix = '/home/cmb-panasas2/skchoudh/github_projects/EE-546-project/cross-species-data/'
for f in sorted(glob.glob('{}/*.tsv'.format(prefix))):
    df = pd.read_table(f)
    df = df.drop(columns=['target_id'])
    matrix = df.values
    colsize = matrix.shape[1]
    rank = matrix_rank(matrix)
    print('{} : Array colsize: {} | rank: {}'.format(f.split('/')[-1], colsize, rank))

SRP007412-gallus_gallus.tsv : Array colsize: 12 | rank: 12
SRP007412-gorilla_gorilla.tsv : Array colsize: 11 | rank: 11
SRP007412-homo_sapiens.tsv : Array colsize: 18 | rank: 18
SRP007412-macaca_mulatta.tsv : Array colsize: 13 | rank: 13
SRP007412-monodelphis_domestica.tsv : Array colsize: 12 | rank: 12
SRP007412-mus_musculus.tsv : Array colsize: 17 | rank: 17
SRP007412-ornithorhynchus_anatinus.tsv : Array colsize: 12 | rank: 12
SRP007412-pan_paniscus.tsv : Array colsize: 12 | rank: 12
SRP007412-pan_troglodytes.tsv : Array colsize: 15 | rank: 15
SRP007412-pongo_abelii.tsv : Array colsize: 9 | rank: 9
SRP016501-bos_taurus.tsv : Array colsize: 27 | rank: 27
SRP016501-gallus_gallus.tsv : Array colsize: 27 | rank: 27
SRP016501-macaca_mulatta.tsv : Array colsize: 27 | rank: 27
SRP016501-mus_musculus.tsv : Array colsize: 26 | rank: 26
SRP016501-rattus_norvegicus.tsv : Array colsize: 27 | rank: 27


In [41]:
def einsum_row_norm(a):
    return np.sqrt(np.einsum('ij,ij->i', a, a))

def einsum_col_norm(a):
    return np.sqrt(np.einsum('ij,ij->j', a, a))


def ho_SVD(list_of_X):
    """ Perform higher order SVD
    
    list_of_X: list
               [X_1, X_2, X_3, ..., X_n] size (n x m)
    
    """
    # Step 1
    # A_i = X_i^T X_i
    # A_inv = moore_penrose_inv(A_i)
    A = []
    N = len(list_of_X)
    A_inv = []
    for X in list_of_X:
        A_i = np.dot(X.T, X)
        A.append(A_i)
        A_inv.append(moore_penrose_inv(A_i))
    # Step 2
    # W = 1/N(N-1) * sum_{i=1}^jn sum_{j>i}^N A_i moore_penrose_inv(A_j) + A_j moore_penrose_inv(A_i)
    W = 0
    
    for i in range(N):
        for j in range(i+1, N):
            W = W + (np.dot(A[i], A_inv[j]) + np.dot(A[j], A_inv[i]))
    W =  W/(N*(N-1))
    
    # Step 3
    # Eigen decomposition of W?
    # WV = V\Lambda
    # V= (v_1, v_2, ..., v_n)
    # ||v||_k =1, but V is not necesarrily orthonormal
    # But we don't
    
    eigen_values, V = eig(W)

    idx = eigen_values.argsort()[::-1]   
    eigen_values = eigen_values[idx]
    V = V[:,idx]
    
    
    # Step 4
    # solve for  VB_i = X_i^T
  
    B = []
    U = []
    sigmas = []
    for X in list_of_X:
        B_i = spams.omp( np.asfortranarray(X.T), V, lambda1=0, numThreads = 24)
        B.append(B_i)
        sigma_i  = np.linalg.norm(B_i, axis=0) #einsum_col_norm(B)
        U_i = B_i/sigma_i 
        sigmas.append(sigma_i)

    
    

In [42]:
df = pd.read_table('/home/cmb-panasas2/skchoudh/github_projects/EE-546-project/cross-species-data/SRP016501-rattus_norvegicus.tsv')
df = df.drop(columns=['target_id'])
matrix1 =  np.asfortranarray(df.values)

df = pd.read_table('/home/cmb-panasas2/skchoudh/github_projects/EE-546-project/cross-species-data/SRP016501-gallus_gallus.tsv')
df = df.drop(columns=['target_id'])
matrix2 =  np.asfortranarray(df.values)


In [None]:
ho_SVD([matrix1, matrix2])