In [None]:
%load_ext watermark


In [None]:
from IPython.display import display
import numpy as np
import pandas as pd


In [None]:
%watermark -diwmuv -iv


## Load Dummy 3D Tensor Data


In [None]:
nrow, ncol, nlay = 6, 9, 12
rank = 3
gridh, gridw = nrow, ncol


In [None]:
data = np.loadtxt("2025-08-07-mttkrp-naive+data.txt").reshape(
    (nrow, ncol, nlay),
)

assert data.shape == (nrow, ncol, nlay)

for layer in range(nlay):
    print(f"Layer {layer + 1} data:")
    display(pd.DataFrame(data[:, :, layer].reshape(nrow, ncol)))


## Load Factor Matrices


In [None]:
A1 = np.loadtxt("2025-08-07-mttkrp-naive+A1.txt")
A2 = np.loadtxt("2025-08-07-mttkrp-naive+A2.txt")
A3 = np.loadtxt("2025-08-07-mttkrp-naive+A3.txt")

assert A1.shape == (nrow, rank)
assert A2.shape == (ncol, rank)
assert A3.shape == (nlay, rank)

print("A1:")
display(pd.DataFrame(A1))
print("A2:")
display(pd.DataFrame(A2))
print("A3:")
display(pd.DataFrame(A3))


## Mode 1 MTTKRP


Put the tensor onto WSE.


In [None]:
pe_tensors = data
assert pe_tensors.shape == (nrow, ncol, nlay)


Put the third factor matrix (`A3`) onto WSE.


In [None]:
pe_A3 = np.zeros(shape=(nrow, ncol, rank))

pe_A3[:, 0, :] = A3[:6, :]
pe_A3[:, 1, :] = A3[6:12, :]


Do Khatri-Rao product on WSE. (first block for now)


In [None]:
pe_A2 = np.zeros(shape=(nrow, ncol, rank))
for A2_row_idx in range(A2.shape[0]):
    pe_A2[0, A2_row_idx, :] = A2[A2_row_idx, :]

# broadcast A2 down along columns
pe_A2[1:, :, :] = pe_A2[0, :, :]

pe_A3_temp = np.zeros(shape=(nrow, ncol, rank))
pe_krtemp = np.zeros(shape=(nrow, ncol, rank))
pe_A1 = np.zeros(shape=(nrow, ncol, rank))

for s in range(6):
    # braodcast first row of A3 to all PEs
    pe_A3_temp[:, :, :] = pe_A3[s, 0, :]

    # all columns have same KRP vector
    pe_krtemp[:, :, :] = pe_A2[:, :, :] * pe_A3_temp[:, :, :]

    # scale by tensor value
    pe_krtemp *= np.tile(np.expand_dims(pe_tensors[:, :, s], axis=2), 3)

    pe_A1[:, 0, :] += pe_krtemp.sum(axis=1)


for s in range(6, 12):
    # braodcast first row of A3 to all PEs
    pe_A3_temp[:, :, :] = pe_A3[s - 6, 1, :]

    # all columns have same KRP vector
    pe_krtemp[:, :, :] = pe_A2[:, :, :] * pe_A3_temp[:, :, :]

    # scale by tensor value
    pe_krtemp *= np.tile(np.expand_dims(pe_tensors[:, :, s], axis=2), 3)

    pe_A1[:, 0, :] += pe_krtemp.sum(axis=1)


In [None]:
A1_extracted = pe_A1[:, 0, :]
assert np.allclose(
    A1_extracted, np.loadtxt("2025-08-07-mttkrp-naive+m1_mttkrpresult.txt")
)
pd.DataFrame(A1_extracted)
