In [1]:
import cupy as cp

ModuleNotFoundError: No module named 'cupy'

In [2]:
import numpy as np

# Assuming arr1 and arr2 are your 2D tensor arrays with the same shape
# Here are some dummy arrays for demonstration. Replace them with your actual data.
arr1 = np.array([[1, 2, np.nan], [4, np.nan, 6]])
arr2 = np.array([[10, 20, 30], [np.nan, 50, 60]])

# Create masks for valid (non-NaN) values in each array
mask1 = ~np.isnan(arr1)
mask2 = ~np.isnan(arr2)

# Compute the sum of the two arrays, treating NaNs as 0
sum_arr = np.nan_to_num(arr1) + np.nan_to_num(arr2)

# Calculate the number of non-NaN elements from both arrays for each position
count_non_nan = mask1.astype(int) + mask2.astype(int)

# Calculate the average, avoiding division by zero
average_arr = np.divide(sum_arr, count_non_nan, out=np.zeros_like(sum_arr), where=count_non_nan!=0)

average_arr  # This is the 2D array with the average values



array([[ 5.5, 11. , 30. ],
       [ 4. , 50. , 33. ]])

In [5]:
def mean_and_std_of_arrays(arrays):
    """
    Calculate the mean and standard deviation of a list of 2D arrays, handling missing values (NaNs),
    and returning NaN for elements that are all-NaN across all slices.
    
    Parameters:
    arrays (list of np.array): A list of 2D numpy arrays with the same shape.
    
    Returns:
    tuple: A tuple containing two 2D arrays, the first with mean values and the second with standard deviation values.
           Both arrays will have NaN where all elements were NaN in the input arrays.
    """
    # Convert the list of arrays to a 3D numpy array where each 2D array is a slice
    arr_stack = np.array(arrays)
    
    # Count the number of non-NaN elements along the new axis (0)
    count_non_nan = np.count_nonzero(~np.isnan(arr_stack), axis=0)
    
    # Compute the sum along the new axis while treating NaNs as 0
    sum_arr = np.nansum(arr_stack, axis=0)
    
    # Calculate the mean, handling divisions by zero by returning NaN in those cases
    mean_arr = np.divide(sum_arr, count_non_nan, out=np.full(sum_arr.shape, np.nan), where=count_non_nan!=0)
    
    # Calculate standard deviation, using the same count to handle NaNs properly
    std_arr = np.sqrt(np.divide(np.nansum((arr_stack - mean_arr)**2, axis=0), count_non_nan, out=np.full(sum_arr.shape, np.nan), where=count_non_nan!=0))
    
    return mean_arr, std_arr

# Example Usage:
# Creating a list of 2D arrays with some NaNs for demonstration
array1 = np.array([[1, 2, np.nan], [np.nan, np.nan, 6]])
array2 = np.array([[10, 20, 30], [np.nan, 50, 60]])
array3 = np.array([[np.nan, 200, 3], [np.nan, 5, np.nan]])

# Calculating the mean of the arrays
mean_array, std_array= mean_and_std_of_arrays([array1, array2, array3])
mean_array, std_array  # Displaying the mean array



(array([[ 5.5, 74. , 16.5],
        [ nan, 27.5, 33. ]]),
 array([[ 4.5       , 89.39798655, 13.5       ],
        [        nan, 22.5       , 27.        ]]))

In [7]:
def reorder_symmetric_matrix(matrix, original_order, new_order):
    """
    Reorder the rows and columns of a 2D symmetric matrix according to a new order of its row/column names.
    
    Parameters:
    matrix (np.array): The original 2D symmetric matrix to be reordered.
    original_order (list): The list of row/column names in the original order of the matrix.
    new_order (list): The list of row/column names in the desired new order.
    
    Returns:
    np.array: The reordered 2D symmetric matrix.
    """
    # Create a mapping from gene names to their index in the original matrix
    gene_to_index = {gene: idx for idx, gene in enumerate(original_order)}

    # Get the indices of the new order
    new_order_indices = [gene_to_index[gene] for gene in new_order]

    # Reorder the matrix rows and columns using the new order indices
    reordered_matrix = matrix[np.ix_(new_order_indices, new_order_indices)]

    return reordered_matrix

# Example Usage:
matrix = np.array([[1, 2, 3], [2, 4, 5], [3, 5, 6]])
gene_names = ['GeneA', 'GeneB', 'GeneC']
gene_names_new_order = ['GeneC', 'GeneA', 'GeneB']
print(matrix)
# Reordering the matrix
reordered_matrix = reorder_symmetric_matrix(matrix, gene_names, gene_names_new_order)
reordered_matrix  # Displaying the reordered matrix




[[1 2 3]
 [2 4 5]
 [3 5 6]]


array([[6, 3, 5],
       [3, 1, 2],
       [5, 2, 4]])

In [7]:
def reorder_symmetric_matrix_vectorized(matrix, original_order, new_order):
    """
    Efficiently reorder the rows and columns of a 2D symmetric matrix according to a new order of its row/column names,
    using vectorized operations for better performance with large matrices.
    
    Parameters:
    matrix (np.array): The original 2D symmetric matrix to be reordered.
    original_order (list): The list of row/column names in the original order of the matrix.
    new_order (list): The list of row/column names in the desired new order.
    
    Returns:
    np.array: The reordered 2D symmetric matrix.
    """
    # Create a mapping from gene names to their index in the original matrix
    gene_to_index = {gene: idx for idx, gene in enumerate(original_order)}
    
    # Convert the new order into indices, with -1 for genes not in the original order
    new_indices = [gene_to_index.get(gene, -1) for gene in new_order]
    
    # Create a mask for valid indices (excluding -1)
    valid_mask = np.array(new_indices) != -1
    
    # Filter out valid indices for both rows and columns
    valid_row_indices = np.array(new_indices)[valid_mask]
    valid_col_indices = valid_row_indices.copy()  # Symmetric matrix has the same row and column indices

    # Initialize a new matrix filled with NaN
    new_matrix = np.full((len(new_order), len(new_order)), np.nan)
    
    # Use valid indices to fill the new matrix from the original matrix
    new_matrix[np.ix_(valid_mask, valid_mask)] = matrix[np.ix_(valid_row_indices, valid_col_indices)]

    return new_matrix

# Example Usage:
matrix = np.array([[1, 2, 3], [2, 4, 5], [3, 5, 6]])
original_order = ['GeneA', 'GeneB', 'GeneC']
new_order = ['GeneC', 'GeneA', 'GeneB', 'GeneA']
print(matrix)
# Reordering the matrix using vectorized operations
reordered_matrix = reorder_symmetric_matrix_vectorized(matrix, original_order, new_order)
reordered_matrix


[[1 2 3]
 [2 4 5]
 [3 5 6]]


array([[6., 3., 5., 3.],
       [3., 1., 2., 1.],
       [5., 2., 4., 2.],
       [3., 1., 2., 1.]])

In [None]:
def mean_and_std_of_arrays_old(arrays):
    """
    Calculate the mean and standard deviation of a list of 2D arrays, handling missing values (NaNs),
    and returning NaN for elements that are all-NaN across all slices.
    
    Parameters:
    arrays (list of np.array): A list of 2D numpy arrays with the same shape.
    
    Returns:
    tuple: A tuple containing two 2D arrays, the first with mean values and the second with standard deviation values.
           Both arrays will have NaN where all elements were NaN in the input arrays.
    """
    # Convert the list of arrays to a 3D numpy array where each 2D array is a slice
    arr_stack = np.array(arrays)
    
    # Count the number of non-NaN elements along the new axis (0)
    count_non_nan = np.count_nonzero(~np.isnan(arr_stack), axis=0)
    
    # Compute the sum along the new axis while treating NaNs as 0
    sum_arr = np.nansum(arr_stack, axis=0)
    
    # Calculate the mean, handling divisions by zero by returning NaN in those cases
    mean_arr = np.divide(sum_arr, count_non_nan, out=np.full(sum_arr.shape, np.nan), where=count_non_nan!=0)
    
    # Calculate standard deviation, using the same count to handle NaNs properly
    std_arr = np.sqrt(np.divide(np.nansum((arr_stack - mean_arr)**2, axis=0), count_non_nan, out=np.full(sum_arr.shape, np.nan), where=count_non_nan!=0))
    
    return mean_arr, std_arr
    
def mean_and_std_of_arrays(arrays):
    """
    Calculate the mean and standard deviation of a list of 2D arrays, handling missing values (NaNs),
    and returning NaN for elements that are all-NaN across all slices.
    
    Parameters:
    arrays (list of np.array): A list of 2D numpy arrays with the same shape.
    
    Returns:
    tuple: A tuple containing two 2D arrays, the first with mean values and the second with standard deviation values.
           Both arrays will have NaN where all elements were NaN in the input arrays.
    """
    # Convert the list of arrays to a 3D numpy array where each 2D array is a slice
    arr_stack = np.array(arrays)
    
    # Count the number of non-NaN elements along the new axis (0)
    count_non_nan = np.count_nonzero(~np.isnan(arr_stack), axis=0)
    
    # Calculate the mean, handling divisions by zero by returning NaN in those cases
    mean_arr = np.nanmean(arr_stack, axis=0)
    
    # Calculate standard deviation, using nanstd to handle NaNs properly
    std_arr = np.nanstd(arr_stack, axis=0)
    
    # Free up memory by deleting the 3D array
    del arr_stack
    
    return mean_arr, std_arr

mean_orig, std_orig = mean_and_std_of_arrays_old(tmp_reordered_simi_matrix_list)
mean_mod, std_mod = mean_and_std_of_arrays(tmp_reordered_simi_matrix_list)
(mean_orig == mean_mod).all(), (std_orig == std_mod).all()