# Use MPI and Scalapy to distribute all of MICA workflow to work on multiple nodes

prep.py component \
Read input file and slice into manageable sizes

In [1]:
#from IPython import parallel
import ipyparallel as ipp

# Launch an ipython parallel cluster
Run this on hpc node to launch a cluster with mpi engines

In [2]:
#this currently needs to be launched from terminal
#We need to launch an ipython parallel cluster
#!ipcluster start --engines=MPIEngineSetLauncher --log-level DEBUG --n=4 &

In [3]:
# Create a parallel client so that we can use %%px cell magic
# With rc and dview, we can interact between mpi ranks and the thread running this notebook
#rc = ipp.Client(profile='mvapich',sshserver='cburdysh@noderome209')
#rc = ipp.Client(profile='mvapich')
rc = ipp.Client()


dview = rc[:]
rc.ids

[0, 1, 2, 3]

In [4]:
%%px
from mpi4py import MPI

In [5]:
%%px
#load all necessary libraries onto each rank

from scipy.sparse import csr_matrix
#from mpi4py import MPI
import sys
import numba
import pandas as pd
import scanpy as sc
import scipy as sci
import numpy as np
import anndata
import time
from sklearn.decomposition import PCA
import fast_histogram
import logging
logging.basicConfig(level=logging.INFO)
from MICA.lib import utils
#from scalapy import *

In [6]:
%%px
from scalapy import *

## Check to make sure MPI (mpi4py) is working

In [7]:
%%px
import os
import socket
#from mpi4py import MPI
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
name = MPI.Get_processor_name()
print("{host}[{pid}]: {rank}/{size}".format(
    host=socket.gethostname(),
    pid=os.getpid(),
    rank=comm.rank,
    size=comm.size,
))

[stdout:0] noderome234[11406]: 0/4
[stdout:1] noderome234[11407]: 1/4
[stdout:2] noderome234[11408]: 2/4
[stdout:3] noderome234[11409]: 3/4


## Begin execution of code

In [13]:
%%px
import os
cwd=os.getcwd()
if rank==0:
    print(cwd)
    
data_file_path = cwd+'/test_data/inputs/10x/PBMC/3k/pre-processed/'
input_file_name = data_file_path + 'pbmc3k_preprocessed.h5ad'
project_name = 'pbmc3k'
output_file_name = data_file_path+project_name

[stdout:0] /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA


In [14]:
%%px
#set slice size (max size of row blocks)
slice_size = 500

In [15]:
%%px
if rank==0:
    print (input_file_name)

[stdout:0] /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA/test_data/inputs/10x/PBMC/3k/pre-processed/pbmc3k_preprocessed.h5ad


## Run Prep_dist() to split file into slices

In [16]:
%%px
#Run prep.py only on one processor to create the slice files
g_nrows=0 #global number of rows (cells)
ncols=0
nslices=0
if rank==0: 
    #g_nrows, ncols, nslices = prep_dist(input_file_name, output_file_name, slice_size)
    g_nrows, ncols, nslices = utils.prep_dist(input_file_name, output_file_name, slice_size)
    
#broadcast resultant variables from root to the other ranks
g_nrows = comm.bcast(g_nrows, root=0)
ncols = comm.bcast(ncols, root=0)
nslices = comm.bcast(nslices, root=0)

[stdout:0] 
output_file_name:  /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA/test_data/inputs/10x/PBMC/3k/pre-processed/pbmc3k.slice_0.h5ad
output_file_name:  /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA/test_data/inputs/10x/PBMC/3k/pre-processed/pbmc3k.slice_1.h5ad
output_file_name:  /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA/test_data/inputs/10x/PBMC/3k/pre-processed/pbmc3k.slice_2.h5ad
output_file_name:  /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA/test_data/inputs/10x/PBMC/3k/pre-processed/pbmc3k.slice_3.h5ad
output_file_name:  /research/rgs01/home/clusterHome/cburdysh/MICA_Project/MICA_distributed/MICA/test_data/inputs/10x/PBMC/3k/pre-processed/pbmc3k.slice_4.h5ad


[stderr:0] 
  if not is_categorical(df_full[k]):
  if is_string_dtype(df[key]) and not is_categorical(df[key])


In [17]:
%%px
if rank==0:
    print("global nrows, ncols, slices: ",g_nrows, ncols, nslices)

[stdout:0] global nrows, ncols, slices:  2496 10499 5


## Read in anndata preprocessed files (in distributed mode, by node number) and calculate distance metrics between all row pairs


In [18]:
%%px
#create a 2d list to hold blocks of similarity matrix
#this should be stored in a distributed scalapack matrix
b=nslices #row blocks
SM = [[None for j in range(b)] for i in range(b)] 

start = time.time()
utils.calc_distance_metric_distributed(data_file_path, project_name, g_nrows, ncols, nslices, SM)
end = time.time()
print("Elapsed = %s" % (end - start))


[stdout:0] 
block comparsons = 15. jobs per rank = 3

rank:  0  comparison between segs: 0  x  0  symmetric= True
rank:  0  comparison between segs: 0  x  1  symmetric= False
rank:  0  comparison between segs: 0  x  2  symmetric= False
rank:  0  comparison between segs: 0  x  3  symmetric= False
Elapsed = 31.496451377868652
[stdout:1] 
rank:  1  comparison between segs: 0  x  4  symmetric= False
rank:  1  comparison between segs: 1  x  1  symmetric= True
rank:  1  comparison between segs: 1  x  2  symmetric= False
rank:  1  comparison between segs: 1  x  3  symmetric= False
Elapsed = 30.980910062789917
[stdout:2] 
rank:  2  comparison between segs: 1  x  4  symmetric= False
rank:  2  comparison between segs: 2  x  2  symmetric= True
rank:  2  comparison between segs: 2  x  3  symmetric= False
rank:  2  comparison between segs: 2  x  4  symmetric= False
Elapsed = 31.283909559249878
[stdout:3] 
rank:  3  comparison between segs: 3  x  3  symmetric= True
rank:  3  comparison between segs:

In [20]:
#%%px 
####from scipy.sparse import csr_matrix #may not use csr as it complicates copy to distributed scalapack and is not used in scalapack apparently
#import collections
#for i in range(b):
#    for j in range(i,b):
#        if isinstance(SM[i][j], collections.Iterable):
#            #print("Rank:",rank, " SM[",i,"][",j,"]=",SM[i][j])
#            print("SM[",i,"][",j,"]=",SM[i][j],"\n")

## Create distributed matrix for scalapack and copy distributed blocks into object
### This matrix needs to be dense for use in scalapack functions, so we will copy the symmetric data into both upper and lower triangular sections of the MI matrix

## copy lower triangular transpose to upper triangular for diagonal blocks

In [21]:
%%px 
##from scipy.sparse import csr_matrix #may not use csr as it complicates copy to distributed scalapack and is not used in scalapack apparently
import collections
for i in range(b):
    for j in range(i,b):
        if isinstance(SM[i][j], collections.Iterable):
            if i==j: #copy lower triangular transpose to upper triangular 
                for ii in range(SM[i][j].shape[0]):
                    for jj in range(ii+1,SM[i][j].shape[1]):
                        (SM[i][j])[ii,jj]=(SM[i][j])[jj,ii]
                #print("Rank:",rank, " SM[",i,"][",j,"]=",SM[i][j])

## Populate a global array with all of the MI data from each rank

Preferably, we would like each rank to contribute of their block MI matrices to the global matrix,
but currently the distributed global matrix has to be constructed from a global (not distributed) array

In [22]:
#copy SM data into global distributed matrix and then write to file?

#then we can read that file into the Scalapack block cyclic matrix form

In [23]:
%%px
#test to distribute matrix from local blocks rather than global array
from scalapy import blacs
import os
import numpy as np
import scipy.linalg as la
from mpi4py import MPI
from scalapy import core
import scalapy.routines as rt

#distribute MI components to ranks as scalapack distributed matrix
comm = MPI.COMM_WORLD
rank = comm.rank
size = comm.size #total number of ranks

global_num_rows =g_nrows
global_num_cols =g_nrows
local_num_rows =g_nrows/b

block_size=64 #default is 32

#Define process grid with process rows and process cols
#We'll use a 2d process grid to distribute blocks so we want to have num_ranks divisible by 2
assert((size % 2)==0)
#ideally we would like BR and BC to the square root of the num_ranks to get a square process matrix
PR=int(np.sqrt(size))
PC=PR

#if we can't create a square matrix, get next best dimensions
if PR*PR!=size:
    PC=size//PR
if rank==0:
    print("PR=",PR, "PC=",PC)

#sets default context and block_shape
core.initmpi([PR, PC],block_shape=[block_size,block_size])
#convert to fortran array indexing to match scalapack functions
#create global matrix from array on rank0
dMI=core.DistributedMatrix(global_shape=[g_nrows,g_nrows],dtype=np.float64)


[stdout:0] PR= 2 PC= 2


In [24]:
#%%px
##get global indices for diagonal
#gi, lri, lci = dMI.local_diagonal_indices()

In [25]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI = %s' % (rank, dMI.global_shape))
#print ('rank %d has local_shape of dMI = %s' % (rank, dMI.local_shape))
#print ('rank %d has block_shape of dMI = %s' % (rank, dMI.block_shape))
#print(dMI.local_array[lri,lci])
#print(dMI.local_array)

In [26]:
#%%px
#blocksize=slice_size
#testrank=3
#testmat=np.zeros(shape=(4,4))
#if comm.rank==testrank:
#    testmat=SM[3][3]
#    #testmat=np.zeros(shape=(500,500))
#    s_block_shape=np.shape(testmat)
#else:
#    testmat=np.zeros(shape=(4,4))
#    s_block_shape=np.shape(testmat)

#s_block_shape = comm.bcast(s_block_shape, root=testrank)   
#copy_from_np(dMI2, testmat, asrow=0, anrow=None, ascol=0, ancol=None, srow=0, scol=0, block_shape=s_block_shape, rank=testrank) #all ranks works


## Copy each SM block submatrix to distributed block cyclic matrix

In [27]:
%%px
blocksize=slice_size
n_jobs_per_rank= (int((b * (b + 1)) / 2))/comm.Get_size()
import collections
for i in range(b):
    for j in range(i,b): # j in range [i,b]
        idx = int(i * b + j - (i * (i + 1)) / 2)
        srank = idx//n_jobs_per_rank
        lA=np.zeros(shape=(2,2))
        s_block_shape=np.shape(lA)
        if isinstance(SM[i][j], collections.Iterable):
            lA=SM[i][j]
            s_block_shape=np.shape(lA)
            print("copy SM[",i,j,"] shape: ",s_block_shape," to global i,j:",i*blocksize,j*blocksize)
        #broadcast sending ranks block shape to all
        s_block_shape = comm.bcast(s_block_shape, root=srank)   
        dMI.np2self(lA, srow=i*blocksize, scol=j*blocksize, block_shape=s_block_shape, rank=srank )      

[stdout:0] 
copy SM[ 0 0 ] shape:  (500, 500)  to global i,j: 0 0
copy SM[ 0 1 ] shape:  (500, 500)  to global i,j: 0 500
copy SM[ 0 2 ] shape:  (500, 500)  to global i,j: 0 1000
copy SM[ 0 3 ] shape:  (500, 500)  to global i,j: 0 1500
[stdout:1] 
copy SM[ 0 4 ] shape:  (500, 496)  to global i,j: 0 2000
copy SM[ 1 1 ] shape:  (500, 500)  to global i,j: 500 500
copy SM[ 1 2 ] shape:  (500, 500)  to global i,j: 500 1000
copy SM[ 1 3 ] shape:  (500, 500)  to global i,j: 500 1500
[stdout:2] 
copy SM[ 1 4 ] shape:  (500, 496)  to global i,j: 500 2000
copy SM[ 2 2 ] shape:  (500, 500)  to global i,j: 1000 1000
copy SM[ 2 3 ] shape:  (500, 500)  to global i,j: 1000 1500
copy SM[ 2 4 ] shape:  (500, 496)  to global i,j: 1000 2000
[stdout:3] 
copy SM[ 3 3 ] shape:  (500, 500)  to global i,j: 1500 1500
copy SM[ 3 4 ] shape:  (500, 496)  to global i,j: 1500 2000
copy SM[ 4 4 ] shape:  (496, 496)  to global i,j: 2000 2000


In [28]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI = %s' % (rank, dMI.global_shape))
#print ('rank %d has local_shape of dMI = %s' % (rank, dMI.local_shape))
#print ('rank %d has block_shape of dMI = %s' % (rank, dMI.block_shape))
#print(dMI.local_array)
##print(dMI.local_array[lri,lci])

## copy transpose of blocks to fill upper triangular distributed matrix (needed for scalapack computation)

In [29]:
%%px
blocksize=slice_size
n_jobs_per_rank= (int((b * (b + 1)) / 2))/comm.Get_size()
import collections

for i in range(b):
    for j in range(i+1,b): # j in range [i,b]
        idx = int(i * b + j - (i * (i + 1)) / 2)
        srank = idx//n_jobs_per_rank
        lA=np.zeros(shape=(2,2))
        s_block_shape=np.shape(lA)
        if isinstance(SM[i][j], collections.Iterable):
            lA=np.transpose(SM[i][j])
            s_block_shape=np.shape(lA)
            #print("copy SM[",j,i,"] shape: ",s_block_shape)
        #broadcast sending ranks block shape to all
        s_block_shape = comm.bcast(s_block_shape, root=srank)   
        dMI.np2self(lA, srow=j*blocksize, scol=i*blocksize, block_shape=s_block_shape, rank=srank )      

In [30]:
## need to also fill in empty symmetric upper triangular portion

In [31]:
# Even though this is a symmetric matrix, for further processing, we need to copy block data to rest of matrix

In [32]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI = %s' % (rank, dMI.global_shape))
#print ('rank %d has local_shape of dMI = %s' % (rank, dMI.local_shape))
#print ('rank %d has block_shape of dMI = %s' % (rank, dMI.block_shape))
##print(dMI.local_array[0:20,0:20])
#print(dMI.local_array)

In [None]:
#%%px
###lr=[range(4)]
###lc=[range(4)]
####Check to see of data was transferred to rank 0
#if rank==0:
#    for i in range(b):
#            for j in range(i,b): # j in range [i,b]
#                #print("SM[",i,j,"] ",np.shape(SM[i][i]))
#                print("SM[",i,j,"] ",SM[i][j])
##                print( (SM[i][j])[lr,lc] )   

In [None]:
#%%px
#from inspect import getmembers, isfunction, ismodule
#if rank == 0:
#    print([o[0] for o in getmembers(scalapy) if ismodule(o[1])])

In [None]:
#%%px
#from inspect import getmembers, isfunction, ismodule
#import scalapy
#if rank==0:
#    print(getmembers(scalapy.blacs, isfunction))
#    #print(getmembers(scalapy.routines, isfunction))

In [None]:
#%%px
#total number of global blocks = total number of block comparisons
#Should be greater than number of ranks to improve load balancing
# b is number of slices (blocks of rows) original data has been discretized into.
#The size of these blocks can be variable
#global_number_of_matrix_blocks= int((b * (b + 1)) / 2) 

#We'll use a 2d process grid to distribute blocks so we want to have num_ranks divisivle by 2

In [None]:
#bcast SM[i][j] from each rank to root rank 0 so that we can load global matrix array


## Write distributed MI matrix to file
### So we can read this in to Scalapack later on

In [33]:
%%px
#Write MI matrix to file
mi_filename = data_file_path+project_name+'_mi_distributed.scalapack'
dMI.to_file(mi_filename)

## The following code snippet reads MI matrix from a file and loads it into a distributed Scalapack matrix

In [34]:
%%px
#Read MI matrix from file
mi_filename = data_file_path+project_name+'_mi_distributed.scalapack'
dMI.from_file(mi_filename, global_shape=[g_nrows,g_nrows], dtype=np.float64, block_shape=[block_size,block_size])

[0;31mOut[0:17]: [0m<scalapy.core.DistributedMatrix at 0x2aab50f4d4a8>

[0;31mOut[1:17]: [0m<scalapy.core.DistributedMatrix at 0x2aaaf3002470>

[0;31mOut[2:17]: [0m<scalapy.core.DistributedMatrix at 0x2aab50fdeef0>

[0;31mOut[3:17]: [0m<scalapy.core.DistributedMatrix at 0x2aaaee905518>

In [35]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI = %s' % (rank, dMI2.global_shape))
#print ('rank %d has local_shape of dMI = %s' % (rank, dMI2.local_shape))
#print ('rank %d has block_shape of dMI = %s' % (rank, dMI2.block_shape))
#print(dMI2.local_array)

## Now we need to create a normalization matrix
### We start with an empty matrix but add the the diagonal as the first column
### Then we multiply by its transpose to get a dense matrix

In [36]:
%%px
#get global indices for diagonal
gi, lri, lci = dMI.local_diagonal_indices()

In [37]:
%%px
#create matrix to store diagonal row
dMI_diag=core.DistributedMatrix.empty_like(dMI)
dMI_row1=core.DistributedMatrix.empty_like(dMI)

In [38]:
%%px
dgi, dlri, dlci = dMI_diag.local_diagonal_indices()

In [39]:
%%px
dMI_diag.local_array[dlri,dlci]=dMI.local_array[lri,lci]
#dMI_diag.local_array[0,dlci]=dMI.local_array[lri,lci]
#my_diag[comm.rank]=dMI.local_array[lri,lci]

In [40]:
#%%px
#print(dMI_diag.local_array)

## Create a matrix with ones in the first row and zeros elsewhere

In [41]:
%%px
ri, ci = dMI_row1.indices()
dMI_row1.local_array[:]= ((ri==0).astype(int)).astype(float)
#print(dMI_row1.local_array)

## Multiply the matrices to get diagonal values on first row of distributed matrix

In [42]:
%%px
#dMI_norm = rt.dot(dMI_diag,dMI_row1,transA='T')
dMI_norm = rt.dot(dMI_row1,dMI_diag)
#dMI_norm = dMI_diag.dot(dMI_row1)

In [43]:
#%%px
####print(dMI_norm.local_array)

## Multiply the matrix with its transpose to get a dense matrix for normalization

In [44]:
%%px
import scalapy.routines as rt
#dMI_norm=dMI_diag.T*dMI_diag
dMI_norm2 = rt.dot(dMI_norm,dMI_norm,transA='T')

In [45]:
#%%px
#print(dMI_norm2.local_array)

In [46]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, dMI_diag.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, dMI_diag.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, dMI_diag.block_shape))
#print(dMI_diag.local_array)

In [47]:
#%%px
#blocksize=slice_size
#n_jobs_per_rank= (int((b * (b + 1)) / 2))/comm.Get_size()
#import collections
#for i in range(b):
#    #for j in range(i+1,b): # j in range [i,b]
#    #idx = int(i * b + j - (i * (i + 1)) / 2)
#    #srank = idx//n_jobs_per_rank
#    lA=
#    s_block_shape=np.shape(lA)
#    if isinstance(SM[i][j], collections.Iterable):
#        lA=local_diag
#        s_block_shape=np.shape(lA)
#    #broadcast sending ranks block shape to all
#    s_block_shape = comm.bcast(s_block_shape, root=srank)   
#    dMI_diag.np2self(lA, srow=i*blocksize, scol=0, block_shape=s_block_shape, rank=srank )  

In [48]:
#%%px
##Array must be 2D for this to work
##convert to fortran array indexing to match scalapack functions
#global_diag=np.asfortranarray(global_diag)
##create global matrix from array on rank0
#dMI_diag=core.DistributedMatrix.from_global_array(global_diag,rank=0)

In [49]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, dMI_diag.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, dMI_diag.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, dMI_diag.block_shape))
##print(dMI_diag.local_array)

In [50]:
#%%px
#dMI_diag_T=dMI_diag.T
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, dMI_diag_T.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, dMI_diag_T.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, dMI_diag_T.block_shape))
#
#print(dMI_diag_T.local_array)

## Use scalapack to compute distributed GEMM

In [51]:
#%%px
#import scalapy.routines as rt
##dMI_norm=dMI_diag.T*dMI_diag
#dMI_norm = rt.dot(dMI_diag,dMI_diag,transA='T')


In [52]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, dMI_norm2.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, dMI_norm2.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, dMI_norm2.block_shape))
#print(dMI_norm2.local_array)

## Compute the square root of the normalization matrix

In [53]:
%%px
#compute sqrt of each element
dMI_norm_square=core.DistributedMatrix.empty_like(dMI)
dMI_norm_square.local_array[:] = np.sqrt(dMI_norm2.local_array[:])

In [54]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, dMI_norm_square.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, dMI_norm_square.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, dMI_norm_square.block_shape))
#print(dMI_norm_square.local_array)

## Now we can finally compute the norm of the MI matrix

In [55]:
%%px
dMI_normed=core.DistributedMatrix.empty_like(dMI)
dMI_normed.local_array[:] = dMI.local_array[:] / dMI_norm_square.local_array[:]

In [56]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, dMI_normed.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, dMI_normed.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, dMI_normed.block_shape))
#print(dMI_normed.local_array)

In [57]:
%%px
mi_normed_filename = data_file_path+project_name+'_mi_normed_distributed.scalapack'
dMI_normed.to_file(mi_normed_filename)

## Now compute eigenvalues and eigenvectors of dissimmilarity matrix

In [None]:
#def mds(in_mat_file, max_dim, out_file_name, perplexity=30, print_plot="True", dist_method="mi",):
#    hdf = pd.HDFStore(in_mat_file)
#    if dist_method == "mi":
#        dlplf = 1 - hdf["norm_mi"]
#    elif dist_method == "euclidean":
#        df = hdf[dist_method]
#    else:
#        df = 1 - hdf[dist_method]
#
#    hdf.close()
#    n = df.shape[0]
#    H = np.eye(n) - np.ones((n, n)) / n
#    B = -H.dot(df ** 2).dot(H) / 2
#    evals, evecs = eigh(B, eigvals=(n - np.min([n, 200]), n - 1))
#    
#    idx = np.argsort(evals)[::-1]
#    evals = evals[idx]
#    evecs = evecs[:, idx]
#    evals_pos = evals > 0
#    L = np.diag(np.sqrt(evals[evals_pos]))
#    V = evecs[:, evals_pos]
#    Y = pd.DataFrame(
#        data=V.dot(L),
#        index=df.index,
#        columns=["mds_" + str(x) for x in np.arange(1, L.shape[0] + 1)],
#    )
#
#    Y.to_hdf(out_file_name + "_reduced.h5", "mds")  # save reduced mi in mds
#
#    if print_plot == "True":
#        vis = tsne(
#            Y,
#            max_dim,
#            out_file_name,
#            "mds",
#            perplexity,
#            print_plot,
#        )
#        vis.to_hdf(out_file_name + "_reduced", "mds_tsne")  # save preview in key "mds_tsne"

In [58]:
%%px
import time
start = time.time()
import scalapy.routines as rt

n= g_nrows

#convert similarity matrix to dissimilarity matrix
#df= 1-df
MDS= core.DistributedMatrix.empty_like(dMI)
MDS.local_array[:]=1.0-dMI_normed.local_array[:]

# H = I-Ones/n
I= core.DistributedMatrix.identity(n=g_nrows)
Ones= core.DistributedMatrix.empty_like(dMI)
Ones.local_array[:]=1.0/n
H = core.DistributedMatrix.empty_like(dMI)
H.local_array[:] = I.local_array[:] - Ones.local_array[:]

# B = -H.dot(MDS**2).dot(H)/2
negH= core.DistributedMatrix.empty_like(dMI)
negH.local_array[:]= -H.local_array[:]
MDS2= core.DistributedMatrix.empty_like(dMI)
MDS2.local_array[:] = MDS.local_array[:]**2
C=rt.dot(negH,MDS2)
B = rt.dot(C,H)
B.local_array[:]=B.local_array[:]/2.0
#dMI_norm=dMI_diag.T*dMI_diag
#dMI_norm = rt.dot(dMI_diag,dMI_diag,transA='T')

end = time.time()
print("Elapsed = %s" % (end - start))

[stdout:0] Elapsed = 5.076117515563965
[stdout:1] Elapsed = 5.073058605194092
[stdout:2] Elapsed = 5.073890209197998
[stdout:3] Elapsed = 5.0699381828308105


In [59]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, MDS.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, MDS.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, MDS.block_shape))
##print(MDS.local_array)

In [60]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, MDS2.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, MDS2.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, MDS2.block_shape))
#print(MDS2.local_array)

In [61]:
#%%px
#if rank==0: 
#    print ('rank %d has global_shape of dMI_diag = %s' % (rank, B.global_shape))
#print ('rank %d has local_shape of dMI_diag = %s' % (rank, B.local_shape))
#print ('rank %d has block_shape of dMI_diag = %s' % (rank, B.block_shape))
#print(B.local_array)

In [62]:
%%px
import time
start = time.time()

import scalapy.routines as rt
#compute eigh(B,)
#we want to pick out the top 200 eigenvalues/vectors from the matrix
#evals, evecs = eigh(B, eigvals=(n - np.min([n, 200]), n - 1))

#evals, dZd = rt.eigh(B,overwrite_a=False)
#returns same evals np.array to all ranks
# and distributed evecs matrix dZd
#evals, dZd = rt.eigh(B,overwrite_a=False,eigvals=(n - np.min([n, 200]), n - 1))
evals, dZd = rt.eigh(B,eigvals=(n - np.min([n, 200]), n - 1)) #689 seconds!!
#evals, dZd = rt.eigh(B) #281.32007026672363 seconds. Not sure why this is less

#copy evecs to root
evecs = dZd.to_global_array(rank=0)
#gZd = dZd.to_global_array(rank=0)

end = time.time()
print("Elapsed = %s" % (end - start))  

[stdout:0] Elapsed = 2.6240415573120117
[stdout:1] Elapsed = 2.623486042022705
[stdout:2] Elapsed = 2.624171733856201
[stdout:3] Elapsed = 2.6239771842956543


## gather the top 200 eigenvalues on a single rank

## Read in original dataframe to get labels to attach to results

In [67]:
%%px
#get index names from original dataframe
import pandas as pd
if rank==0:
    data_file_path = cwd+'/test_data/inputs/10x/PBMC/3k/pre-processed/'
    input_file_name = data_file_path + 'pbmc3k_preprocessed.h5ad'
    adf=utils.read_anndata_file(input_file_name)
    index=adf.obs.index

## Postprocess the eigenvalues by sorting and removing negative vals

In [68]:
%%px
if rank==0:
    idx = np.argsort(evals)[::-1]
    print(len(idx))
    
    evals = evals[idx]
    evecs = evecs[:, idx]
    evals_pos = evals > 0
    L = np.diag(np.sqrt(evals[evals_pos]))
    V = evecs[:, evals_pos]
    #print(V)
    
    Y = pd.DataFrame(
        data=V.dot(L),
        index=index, #need to reattach index names to eigenvectors
        columns=["mds_" + str(x) for x in np.arange(1, L.shape[0] + 1)],
    )
    
#Y.to_hdf(out_file_name + "_reduced.h5", "mds")  # save reduced mi in mds

[stdout:0] 200


## Write reduced data to file

In [71]:
%%px
if rank==0:
    data_file_path = cwd+'/test_data/inputs/10x/PBMC/3k/pre-processed/'
    out_file_name = data_file_path + 'pbmc3k_preprocessed'    
    Y.to_hdf(out_file_name + "_reduced.h5", "mds")  # save reduced mi in mds

In [72]:
%%px
if rank==0:
    print(Y)

[stdout:0] 
                     mds_1     mds_2     mds_3     mds_4     mds_5     mds_6  \
AAACATACAACCAC-1  0.080218 -0.007323  0.032147 -0.021112 -0.008207 -0.000903   
AAACATTGATCAGC-1  0.051539 -0.056262  0.045581  0.053011  0.010254  0.053645   
AAACCGTGCTTCCG-1 -0.188360  0.023939  0.013209  0.032328 -0.012209 -0.021553   
AAACCGTGTATGCG-1 -0.056562 -0.144185 -0.148594 -0.018133  0.063652  0.010560   
AAACGCACTGGTAC-1  0.038719 -0.022159  0.008433  0.000035 -0.009473  0.002272   
...                    ...       ...       ...       ...       ...       ...   
TTTCGAACTCTCAT-1 -0.208469  0.031346  0.019770  0.031711 -0.039463  0.031550   
TTTCTACTGAGGCA-1 -0.007556 -0.000021 -0.039614  0.084780 -0.006493  0.035069   
TTTCTACTTCCTCG-1  0.043454  0.139369 -0.093433  0.012428 -0.007409  0.014009   
TTTGCATGAGAGGC-1  0.003246  0.097821 -0.129420 -0.020403  0.009517  0.044159   
TTTGCATGCCTCAC-1  0.090826  0.018071  0.057012  0.003005  0.021881  0.010015   

                     mds_7 

In [74]:
#%%px
#perplexity=30
#max_dim=200
#    Y.to_hdf(out_file_name + "_reduced.h5", "mds")  # save reduced mi in mds
#
#    if print_plot == "True":
#        vis = tsne(
#            Y,
#            max_dim,
#            out_file_name,
#            "mds",
#            perplexity,
#            print_plot,
#        )
#        vis.to_hdf(out_file_name + "_reduced", "mds_tsne")  # save preview in key "mds_tsne"