In [1]:
import matplotlib.pyplot as plt
import os
import numpy as np

## Helpers

In [2]:
def clean(matrix, tol=1e-10):
    arr = matrix.copy()

    # Flush small elements to zero
    arr[np.abs(arr) < tol] = 0

    # Coalesce similar elements
    if arr.size > 1:
        amfl = np.abs(arr.flat)
        amix = np.argsort(amfl)

        i, ix = 0, amix[0]
        for j, jx in enumerate(amix[1:], start=1):
            if amfl[jx] - amfl[ix] >= tol:
                if j - i > 1:
                    amfl[amix[i:j]] = np.median(amfl[amix[i:j]])
                i, ix = j, jx

        if i != j:
            amfl[amix[i:]] = np.median(amfl[amix[i:]])

        # Fix up the signs and assign
        arr.flat = np.copysign(amfl, arr.flat)

    return arr

In [3]:
MAT_DIR = "./../../example_mats_med"

matrices = {
    "quad" : {},
    "hex"  : {},
    "tet"  : {},
    "tri"  : {}
}

In [4]:
for d in os.walk(MAT_DIR):
    # mat file names
    for m in d[2]:
        mat_data = np.loadtxt(os.path.join(d[0], m))
        mat_data = clean(mat_data)
        # order of prolem and sub type
        mat_name = d[0].split("/")[-2] + "-" + d[0].split("/")[-1]
        # add size
        mat_name += "-" + m.split(".")[0]

        # store in correct location according to type
        for k in matrices.keys():
            if k in d[0]:
                matrices[k][mat_name] = mat_data

## Base Pseudo FLOPS

In [8]:
matrices["quad"]['p1-gauss-legendre-m132']

array([[ 0.8660254,  0.8660254,  0.       ,  0.       ,  0.8660254,
         0.       ,  0.8660254,  0.       ],
       [-0.8660254, -0.8660254,  0.       ,  0.       ,  0.       ,
         0.8660254,  0.       ,  0.8660254],
       [ 0.       ,  0.       ,  0.8660254,  0.8660254, -0.8660254,
         0.       , -0.8660254,  0.       ],
       [ 0.       ,  0.       , -0.8660254, -0.8660254,  0.       ,
        -0.8660254,  0.       , -0.8660254]])

In [21]:
TARGET_B_SIZE = 100 * 1e6
AVX_512_WIDTH = 8

Need:

    - width of B
    - number of non-zero in A

In [35]:
def basic_gflops(mat):
    # A and B dimensions
    mat_a_dims = mat.shape
    mat_b_dims = (mat_a_dims[1], int(TARGET_B_SIZE/mat_a_dims[1]))
    
    # below count would be repeated across panels of B
    num_panels = mat_b_dims[1]/AVX_512_WIDTH

    # ijk loop skipping 0s
    # assume SIMD version - 8 wide - using FMA
    flops = 0
    for row in mat:
        for el in row:
            if el != 0:
                # can add load of B here
                flops += 8 # 8 wide FMA
        # can add store count here
    
    return (flops * num_panels) / 1e9 # for whole mat mul - GFLOPS

In [36]:
l = basic_gflops(matrices["quad"]['p1-gauss-legendre-m132'])

In [37]:
l

0.2