In [1]:
import matplotlib.pyplot as plt
import scipy.io as sio
import scipy.sparse as scsp
import numpy as np
import os
import json

Make the paths match what you want to analyze and where to store the results

Currently analyzing every matrix in a folder will mean every file with matrix in its name, and the .mm or .bin extension depending on what you want

In [2]:
matrix_folder = "/path/to/your/matrices/"
result_file_name = "matrices_analyzed"
# the path to amgcl bin2mm can be whatever if convert_from_binary is False
# set convert_from_binary to True if you want to convert from binary
convert_from_binary = False
amgcl_bin_to_mm_converter = "path/to/bin2mm"

BLOCKSIZE = 2 # the blocksize of the matrices in the folder

analyze_matrix computes various characteristics of a bsr matrix and returns the results in a dictionary
Currently the function computes:
* Mean nonzero entries per row
* Median nonzero entries per row
* Min number of nonzero entries in a row
* max number of nonzero entries in a row
* whether or not the matrix is symmetric
* a bitmasked array showing the amount of blocks with each possible sparsity pattern

In [3]:
# these constants are used to index the resulting tuple from sio.mminfo
ROWS = 0
COLS = 1
ENTRIES = 2

def analyze_matrix(matrix, metadata):
    # compute a histogram of how many nonzero entries are in each row of the matrix
    M = len(matrix.indptr) - 1
    rowHistogram = np.zeros(M, dtype=int)
    for i in range(1, M+1):
        rowHistogram[i-1] = matrix.indptr[i]-matrix.indptr[i-1]

    # use the histogram to compute simple statistical characteristics of the matrix
    median = np.median(rowHistogram)
    min_nnz = np.min(rowHistogram)
    max_nnz = np.max(rowHistogram)
    arithmetic_mean = np.mean(rowHistogram)

    # check if the matrix is symmetric
    symmetric = True
    for row in range(M):
        for colIdx in range(matrix.indptr[row], matrix.indptr[row+1]):
            col = matrix.indices[colIdx]
            if row not in matrix.indices[matrix.indptr[col]:matrix.indptr[col+1]]:
                symmetric = False

    bit_masked_count = [0]*16
    for block_idx in range(len(matrix.data)):
        mask = 0
        mask += 1 if matrix.data[block_idx][0][0] != 0.0 else 0
        mask += 2 if matrix.data[block_idx][0][1] != 0.0 else 0
        mask += 4 if matrix.data[block_idx][1][0] != 0.0 else 0
        mask += 8 if matrix.data[block_idx][1][1] != 0.0 else 0
        bit_masked_count[mask] += 1

    return {
        "nnz" : str(metadata[ENTRIES]),
        "dims" : str([metadata[ROWS], metadata[COLS]]),
        "median" : str(median),
        "minNnzInRow" : str(min_nnz),
        "maxNnzInRow" : str(max_nnz),
        "mean" : str(arithmetic_mean),
        "symmetric" : str(symmetric),
        "blockSparsity" : str(bit_masked_count)
    }

In [4]:
# this function returns true if the filename provided has "matrix" in it and the extension is the same as the one provided
def matrix_filter(filename, extension):
    if "matrix" not in filename:
        return False
    if len(extension) > len(filename):
        return False
    if filename[len(filename) - len(extension):] != extension:
        return False
    return True

analysis_results = {}

if convert_from_binary:
    pass
else:
    files = os.scandir(matrix_folder)
    filenames = []
    for f in files:
        filenames.append(f.name)
    filenames = list(filter(lambda e: matrix_filter(e, ".mm"), filenames))

    for matrix_file in filenames:
        # Read in the file as a BSR matrix
        path_to_matrix = matrix_folder + matrix_file
        try:
            matrix_metadata = sio.mminfo(path_to_matrix)
            matrix = sio.mmread(path_to_matrix)
            matrix = matrix.tobsr([BLOCKSIZE,BLOCKSIZE])

            analysis_results[matrix_file] = analyze_matrix(matrix, matrix_metadata)
        except:
            analysis_results[matrix_file] = {"FAILED ANALYSIS"}
            print("analysis failed for", matrix_file)

analysis failed for prob_2_time_7443470.1734501__nit_1_matrix_istl_0.bin.mm
analysis failed for prob_2_time_7325501.67195367__nit_3_matrix_istl_0.bin.mm
analysis failed for prob_2_time_7443470.1734501__nit_4_matrix_istl_0.bin.mm
analysis failed for prob_2_time_7443470.1734501__nit_0_matrix_istl_0.bin.mm
analysis failed for prob_2_time_7443470.1734501__nit_2_matrix_istl_0.bin.mm
analysis failed for prob_2_time_7652935.08672505__nit_1_matrix_istl_0.bin.mm
analysis failed for prob_2_time_7652935.08672505__nit_3_matrix_istl_0.bin.mm


In [None]:
with open(result_file_name + '.json', 'w', encoding='utf-8') as f:
    json.dump(analysis_results, f, ensure_ascii=False, indent=4)