# Python model for quick validation of Hardware Design Results and Software Results

对正向和反向各阶段的**输入**、**输出**以及重要的**参数**和**中间结果**做可视化打印，省去复杂的计算流程，主要包含：
- 正向
    - Tabulate       (table参数 R矩阵 S向量)
    - Descr          (R矩阵 G矩阵 scatter1/2 D矩阵)
    - FittingNet     (D矩阵 W[0~3]权重 B[0~3]偏置 idt参数)
- 反向
    - Descr_Grad     (D_Grad矩阵 scatter1/2^T导数)
    - Tabulate_Grad  (table参数 R_Grad S_Grad)

| Tabulate      | table参数 | R矩阵      | S向量 |
| Descr         | R矩阵     | G矩阵      | scatter1/2 | D矩阵   |
| FittingNet    | D矩阵     | W[0~3]权重 | B[0~3]偏置  | idt参数 |
|---------------|---------------------------------------------|
| Descr_Grad    | D_Grad矩阵 | scatter1/2^T  |
| Tabulate_Grad | table参数  | R_Grad S_Grad |

In [345]:
import numpy as np
import tensorflow as tf

## 正向 Tabulate 和 Descr

In [346]:
def read_binary_tb(filename):
    table_sz = 1360 * 128 * 6
    s_sz = 108 * 512
    r_sz = 108 * 512 * 4
    fusa_sz = 108 * 4 * 128
    fusb_sz = 108 * 4 * 16
    descriptor_sz = 108 * 2048
    total_sz = table_sz + s_sz + r_sz + fusa_sz + fusb_sz + descriptor_sz
    
    data = np.fromfile(filename, dtype=np.float64, count=total_sz)
    if data.size != total_sz:
        print("binary file error\n")
    
    table = data[:table_sz].reshape((1360, 128, 6))
    offset = table_sz
    
    s = data[offset:offset + s_sz].reshape((108, 512))
    offset += s_sz
    
    r = data[offset:offset + r_sz].reshape((108, 512, 4))
    offset += r_sz
    
    fusa = data[offset:offset + fusa_sz].reshape((108, 4, 128))
    offset += fusa_sz
    
    fusb = data[offset:offset + fusb_sz].reshape((108, 4, 16))
    offset += fusb_sz
    
    descriptor = data[offset:offset + descriptor_sz].reshape((108, 2048))
    
    print("Table shape:", table.shape)
    print("S shape:", s.shape)
    print("R shape:", r.shape)
    print("Fusa shape:", fusa.shape)
    print("Fusb shape:", fusb.shape)
    print("Descriptor shape:", descriptor.shape)
    
    return table, s, r, fusa, fusb, descriptor

In [347]:
filename = "params/tb.txt"
table, s, r, fusa, fusb, descriptor = read_binary_tb(filename)
table = np.array(table)
s = np.array(s)
r = np.array(r)
fusa = np.array(fusa)
fusb = np.array(fusb)
descriptor = np.array(descriptor)

Table shape: (1360, 128, 6)
S shape: (108, 512)
R shape: (108, 512, 4)
Fusa shape: (108, 4, 128)
Fusb shape: (108, 4, 16)
Descriptor shape: (108, 2048)


### 打印 tabulate 和 descr 模块所需的计算参数

In [348]:
# 设置打印精度
# 修改长度即可
def custom_format(x):
    return "{:.10e}".format(x)

np.set_printoptions(formatter={'float_kind': custom_format})

In [349]:
# print(table)
index = 0
print(table)

[[[4.7847021668e-02 3.0747029555e-02 9.0515144067e-04 -1.7627476552e-03
   -1.9263159878e-03 2.1667394474e-04]
  [2.7605576401e+00 -4.3577939179e-01 -3.4262125193e-01
   -1.1414758013e-01 3.2293245699e-02 5.5691578293e-02]
  [-1.2896528483e+00 1.9702487080e-01 -5.5664212798e-02 5.3054650386e-03
   1.4107237710e-03 -5.5344741984e-04]
  ...
  [1.0239273626e-03 -5.6408015786e-03 2.6408290225e-05 3.4064405521e-05
   -3.2076209444e-06 -2.7340400750e-05]
  [9.8509426447e-01 -1.3790789227e-02 -6.1458149900e-03
   -1.8486179141e-03 -4.0953070749e-04 -5.1298676241e-05]
  [-1.5937061072e-02 2.1388251887e-03 2.0609348753e-03 3.6622133267e-03
   1.1958314609e-03 -3.1382748509e-03]]

 [[4.8154580697e-02 3.0764596065e-02 8.5111538814e-04 -1.8395823945e-03
   -1.9153955432e-03 3.1857659458e-04]
  [2.7561654702e+00 -4.4266592915e-01 -3.4602574647e-01
   -1.1280011629e-01 3.5078102014e-02 5.7039555968e-02]
  [-1.2876881607e+00 1.9591318380e-01 -5.5504207947e-02 5.3613365206e-03
   1.3829918097e-03 -5.7

In [350]:
# print(s)
index = 0
print(s[0].shape)
print(s.dtype)
print(s)

(512,)
float64
[[-1.3666269788e-01 -1.3879777814e-01 -1.4521026634e-01 ...
  -3.1147573666e-01 -3.1147573666e-01 -3.1147573666e-01]
 [-1.2388602807e-01 -1.2797526267e-01 -1.3435808863e-01 ...
  -3.1147573666e-01 -3.1147573666e-01 -3.1147573666e-01]
 [1.6278337515e+00 -1.3033667510e-01 -1.3033673877e-01 ...
  -3.1147573666e-01 -3.1147573666e-01 -3.1147573666e-01]
 ...
 [-1.2431364534e-01 -1.2855119868e-01 -1.2961934697e-01 ...
  -3.1147573666e-01 -3.1147573666e-01 -3.1147573666e-01]
 [-1.2541309550e-01 -1.3488331870e-01 -1.3985506744e-01 ...
  -3.1147573666e-01 -3.1147573666e-01 -3.1147573666e-01]
 [-1.2381754230e-01 -1.2431043257e-01 -1.2456725030e-01 ...
  -3.1147573666e-01 -3.1147573666e-01 -3.1147573666e-01]]


In [351]:
# print(r)
index = 0
print(r[index].shape, r[index].dtype)
print(r)

(512, 4) float64
[[[-1.3666269788e-01 -7.8428058491e-02 -1.5497025573e-01
   -2.3109357019e-01]
  [-1.3879777814e-01 -2.2342239647e-01 1.5937520732e-01 7.8892531715e-02]
  [-1.4521026634e-01 2.1861902801e-01 -6.8871476747e-02
   -1.5185759988e-01]
  ...
  [-3.1147573666e-01 -0.0000000000e+00 -0.0000000000e+00
   -0.0000000000e+00]
  [-3.1147573666e-01 -0.0000000000e+00 -0.0000000000e+00
   -0.0000000000e+00]
  [-3.1147573666e-01 -0.0000000000e+00 -0.0000000000e+00
   -0.0000000000e+00]]

 [[-1.2388602807e-01 8.2235611816e-02 1.6444138635e-01 2.4985944356e-01]
  [-1.2797526267e-01 -8.2476893202e-02 -2.4221387073e-01
   1.6313671907e-01]
  [-1.3435808863e-01 -1.5437229158e-01 -2.3592580996e-01
   -7.9354837698e-02]
  ...
  [-3.1147573666e-01 -0.0000000000e+00 -0.0000000000e+00
   -0.0000000000e+00]
  [-3.1147573666e-01 -0.0000000000e+00 -0.0000000000e+00
   -0.0000000000e+00]
  [-3.1147573666e-01 -0.0000000000e+00 -0.0000000000e+00
   -0.0000000000e+00]]

 [[1.6278337515e+00 1.3888242219

In [352]:
# print(fusa)
index = 0
print(fusa[0].shape)
print(fusa)

(4, 128)
[[[1.0748249673e-02 -2.3141122001e-01 -2.4712432673e-02 ...
   -7.1355618362e-03 -1.7358211115e-01 2.6046595776e-02]
  [-6.2141650093e-04 -4.6341544230e-05 -2.1660539245e-04 ...
   -1.6213251437e-05 9.1382247643e-04 5.6364192249e-04]
  [2.7623290290e-03 -3.7904189354e-03 3.3175172846e-03 ...
   1.7245397839e-04 2.3001151966e-03 -9.1193138040e-04]
  [-7.6045340867e-05 -2.9824232289e-04 7.9319705551e-04 ...
   3.3205947672e-06 -1.2178795252e-03 -1.9344330070e-03]]

 [[1.2120516310e-02 -2.3089011756e-01 -2.4827063204e-02 ...
   -7.1046135854e-03 -1.7185614353e-01 2.8152428386e-02]
  [-1.4672968776e-03 -2.1990568426e-03 2.0541558485e-03 ...
   3.4856682603e-05 -5.6120591700e-04 2.3423219438e-03]
  [3.6760196792e-04 2.3742029449e-04 -3.3263705103e-04 ...
   6.5142841499e-06 8.5488533502e-04 -2.2509524985e-04]
  [-1.4124779247e-03 -4.8589688004e-04 5.7389686759e-04 ...
   -4.7926972171e-05 -2.3805788654e-03 1.1887964842e-03]]

 [[7.8904231148e-03 -2.3084434569e-01 -2.5055422373e-02 

In [353]:
# print(fusb)
index = 0
print(fusb[0].shape)
print(fusb)

(4, 16)
[[[1.0748249673e-02 -2.3141122001e-01 -2.4712432673e-02 ...
   -2.3020676732e-03 -3.0348636198e-03 3.3321704255e-05]
  [-6.2141650093e-04 -4.6341544230e-05 -2.1660539245e-04 ...
   -1.4353510381e-06 -4.8093373184e-05 2.8692556369e-06]
  [2.7623290290e-03 -3.7904189354e-03 3.3175172846e-03 ...
   1.7839620951e-05 1.8098373416e-03 3.4792417457e-04]
  [-7.6045340867e-05 -2.9824232289e-04 7.9319705551e-04 ...
   -1.0587591065e-05 -3.5738578311e-05 -7.7177203566e-05]]

 [[1.2120516310e-02 -2.3089011756e-01 -2.4827063204e-02 ...
   -2.2911304349e-03 -2.0850693462e-03 1.8226002512e-04]
  [-1.4672968776e-03 -2.1990568426e-03 2.0541558485e-03 ...
   -1.1194273192e-05 -5.1276858403e-04 -5.0569870911e-05]
  [3.6760196792e-04 2.3742029449e-04 -3.3263705103e-04 ...
   6.9793232870e-06 1.7055796659e-04 6.1874475122e-05]
  [-1.4124779247e-03 -4.8589688004e-04 5.7389686759e-04 ...
   -2.4816628670e-05 -8.6852739498e-04 -2.2386053953e-04]]

 [[7.8904231148e-03 -2.3084434569e-01 -2.5055422373e-0

In [354]:
# 通过fusa和fusb计算d
d = np.zeros((108, 128, 16))
for i in range(108):
    d[i] = np.dot(fusa[i].T, fusb[i])
d = d.reshape((108, 2048))
print(d.shape)
print(d[0, 0:20])

(108, 2048)
[1.2354727407e-04 -2.4976844768e-03 -2.5637703888e-04 1.2394854566e-04
 4.1094940777e-04 -5.6605734862e-04 -2.7661756825e-04 2.5322518030e-04
 4.7192496154e-03 -1.2037498742e-03 -5.2470789139e-05 1.0405063068e-03
 -1.7594140471e-06 -2.4692222126e-05 -2.7587501916e-05 1.3233170081e-06
 -2.4976844768e-03 5.3565611120e-02 5.7059328869e-03 -2.5982939702e-03]


In [355]:
# 软件代码算完的d
index = 0
print(descriptor.shape, descriptor.dtype)
print(descriptor[index, 0:20])

(108, 2048) float64
[1.2354727407e-04 -2.4976844768e-03 -2.5637703888e-04 1.2394854566e-04
 4.1094940777e-04 -5.6605734862e-04 -2.7661756825e-04 2.5322518030e-04
 4.7192496154e-03 -1.2037498742e-03 -5.2470789139e-05 1.0405063068e-03
 -1.7594140471e-06 -2.4692222126e-05 -2.7587501916e-05 1.3233170081e-06
 -2.4976844768e-03 5.3565611120e-02 5.7059328869e-03 -2.5982939702e-03]


### 打印 fittingnet 参数以及中间结果

#### 1. fitting net 权重参数

In [356]:
def read_binary_fn(filename):
    # 定义每个部分的大小
    w0_sz = 2048 * 240
    b0_sz = 240
    w1_sz = 240 * 240
    b1_sz = 240
    w2_sz = 240 * 240
    b2_sz = 240
    w3_sz = 240
    b3_sz = 1
    idt1_sz = 240
    idt2_sz = 240
    total_sz = w0_sz + b0_sz + w1_sz + b1_sz + w2_sz + b2_sz + w3_sz + b3_sz + idt1_sz + idt2_sz
    
    # 读取二进制文件中的数据
    data = np.fromfile(filename, dtype=np.float64, count=total_sz)
    
    # 验证读取的数据总量是否正确
    if data.size != total_sz:
        raise ValueError(f"Read data size {data.size} does not match expected size {total_sz}")
    
    # 按照定义的大小截取数据并重塑
    offset = 0
    
    w0 = data[offset:offset + w0_sz].reshape((2048, 240))
    offset += w0_sz
    
    b0 = data[offset:offset + b0_sz]
    offset += b0_sz
    
    w1 = data[offset:offset + w1_sz].reshape((240, 240))
    offset += w1_sz
    
    b1 = data[offset:offset + b1_sz]
    offset += b1_sz
    
    w2 = data[offset:offset + w2_sz].reshape((240, 240))
    offset += w2_sz
    
    b2 = data[offset:offset + b2_sz]
    offset += b2_sz
    
    w3 = data[offset:offset + w3_sz].reshape((240, 1))
    offset += w3_sz
    
    b3 = data[offset:offset + b3_sz]
    offset += b3_sz
    
    idt1 = data[offset:offset + idt1_sz]
    offset += idt1_sz
    
    idt2 = data[offset:offset + idt2_sz]
    
    # 打印调试信息
    print("Shapes of the arrays:")
    print("w0 shape:", w0.shape)
    print("b0 shape:", b0.shape)
    print("w1 shape:", w1.shape)
    print("b1 shape:", b1.shape)
    print("w2 shape:", w2.shape)
    print("b2 shape:", b2.shape)
    print("w3 shape:", w3.shape)
    print("b3 shape:", b3.shape)
    print("idt1 shape:", idt1.shape)
    print("idt2 shape:", idt2.shape)
    
    return w0, b0, w1, b1, w2, b2, w3, b3, idt1, idt2

In [357]:
filename = 'params/fn.txt'
w0, b0, w1, b1, w2, b2, w3, b3, idt1, idt2 = read_binary_fn(filename)
w0 = np.array(w0)
b0 = np.array(b0)
w1 = np.array(w1)
b1 = np.array(b1)
w2 = np.array(w2)
b2 = np.array(b2)
w3 = np.array(w3)
b3 = np.array(b3)
idt1 = np.array(idt1)
idt2 = np.array(idt2)

Shapes of the arrays:
w0 shape: (2048, 240)
b0 shape: (240,)
w1 shape: (240, 240)
b1 shape: (240,)
w2 shape: (240, 240)
b2 shape: (240,)
w3 shape: (240, 1)
b3 shape: (1,)
idt1 shape: (240,)
idt2 shape: (240,)


In [358]:
# print(w0)
# print(b0)
# print(w1)
print(b1)
# print(w2)
# print(b2)
# print(w3)
# print(b3)
# print(idt1)
# print(idt2)

[1.1244967630e+00 2.4675179803e-01 6.0283992941e-01 -4.7584145309e-01
 -1.5316082235e+00 -7.0779292530e-02 1.4868164146e-01 2.9388946119e-01
 1.6675507110e+00 3.3471945229e+00 -5.0735337870e-01 1.4040337518e+00
 -7.7900264006e-01 -1.7527351799e-01 6.6175627628e-01 4.7033666456e-01
 -1.1494149964e+00 6.7186546435e-02 -7.4193867267e-01 1.4220972433e+00
 8.1719952808e-01 2.3041796587e+00 1.5005049737e-01 -7.8017921066e-01
 1.5409687763e+00 2.1786175366e-02 2.1079633988e-01 9.7637475893e-01
 2.1820948615e+00 7.4678879422e-01 7.5358853087e-01 -2.9806047326e-01
 -6.0701437012e-01 -2.6133638435e-02 2.7253395103e-01 2.0924794441e+00
 -9.4396547556e-01 -1.1005938214e+00 -1.9933013619e+00 -8.0722702826e-01
 1.2135232021e+00 -6.1679119388e-01 -1.9172099845e+00 1.8341094640e+00
 2.3818466175e+00 -3.6579584278e-01 1.1252278045e+00 5.0595371958e-01
 1.5663584689e+00 4.0863287855e-01 5.1763368797e-01 7.2828595483e-01
 1.0613019952e+00 1.4245430194e-01 -1.6303930327e+00 -2.2523269208e-01
 8.9825603953

In [359]:
print(idt1)

[5.0086434241e-01 -3.8519404420e-01 5.1267170894e-01 -1.7135700884e-01
 3.1368976156e-01 4.1142591496e-01 -4.6582812922e-01 -4.5066038920e-01
 6.0544138557e-01 -3.6339766642e-01 5.6824020248e-01 5.4923651760e-01
 5.0722835493e-01 5.3647933718e-01 -4.9852157579e-01 -4.2670560587e-01
 3.9724571773e-01 5.1102519245e-01 -4.0974212923e-01 5.0461384902e-01
 -1.6224520235e-01 -5.2266806696e-01 4.8898020644e-01 5.9630800942e-01
 4.6602412686e-01 -4.1653893131e-01 -5.2858017982e-01 -1.0633990661e+00
 -3.9186258297e-01 5.3532287364e-01 4.7106368120e-01 -4.3614299993e-01
 -5.2574535026e-02 -4.4419586341e-01 4.7065352624e-01 6.5888363545e-01
 -4.7626368887e-01 -3.6382194910e-01 -4.6436001254e-01 -4.8599008489e-01
 -4.7129072614e-01 6.1294518722e-01 5.2139376398e-01 5.3909624106e-01
 5.7021674502e-01 5.2328958335e-01 4.8003421432e-01 1.0029718679e+00
 4.8668079816e-01 -5.4815293243e-01 -7.5928676187e-01 -4.2915237266e-01
 -4.9276725143e-01 -4.5981268728e-01 5.0124026498e-01 -5.0568090474e-01
 -4.74

In [360]:
print(idt2)

[4.9913651749e-01 6.1480647336e-01 -4.8732837311e-01 2.3274605009e-01
 -1.3498426372e-01 5.8857196613e-01 -5.3417437388e-01 5.4933729775e-01
 -3.9455752738e-01 6.3660285917e-01 -4.3175825408e-01 -4.5076145859e-01
 4.9276972289e-01 -4.6352015512e-01 -5.0147947296e-01 5.7329441648e-01
 -2.4036757837e-01 4.8897342922e-01 5.9025617153e-01 4.9538521089e-01
 -2.0350650702e-01 -4.7733361428e-01 5.1101880156e-01 4.0369340592e-01
 5.3397467728e-01 5.8345982034e-01 -4.7141908113e-01 6.3400140251e-02
 6.0813710353e-01 -4.2366253550e-01 -5.2893536983e-01 5.6385671573e-01
 1.2879985287e-01 5.5580410050e-01 5.2934712418e-01 -3.4111527607e-01
 5.2373712640e-01 -6.3617707540e-01 -5.3563947290e-01 -5.1401161738e-01
 5.2870761620e-01 -3.8705426518e-01 4.7860490095e-01 4.6090135653e-01
 -4.2978627772e-01 -4.7671568231e-01 4.9196880092e-01 2.9707044935e-03
 5.1331949619e-01 -4.5185121069e-01 -2.4071202177e-01 5.7084703952e-01
 -5.0723129088e-01 -5.4018653566e-01 4.9876164762e-01 -4.9431709139e-01
 5.25318

#### 2.fittingnet 中间结果
模拟fittingnet实际流程->构建每一步的python model数据验证

In [361]:
# layer 0
l0_mul = np.dot(descriptor, w0) + b0
print(l0_mul)

[[-5.9081760940e+00 -5.8717695297e+00 -6.2248216680e+00 ...
  -6.1235947572e+00 6.2732753206e+00 -5.8529953676e+00]
 [-5.8566354887e+00 -5.8391133839e+00 -6.1984720891e+00 ...
  -6.0880590495e+00 6.2226548164e+00 -5.8169858114e+00]
 [-5.9034899776e+00 -5.8681151983e+00 -6.2230850443e+00 ...
  -6.1141264356e+00 6.2891285821e+00 -5.8540529135e+00]
 ...
 [-5.8860581606e+00 -5.8480666860e+00 -6.2111161150e+00 ...
  -6.0926107405e+00 6.2600828327e+00 -5.8334156767e+00]
 [-5.8919856311e+00 -5.8576009625e+00 -6.2033761615e+00 ...
  -6.0989693743e+00 6.2678687969e+00 -5.8398546973e+00]
 [-5.8216059626e+00 -5.8090178250e+00 -6.1743898392e+00 ...
  -6.0518768980e+00 6.1865626402e+00 -5.7840425881e+00]]


In [362]:
l0_tanh = np.tanh(l0_mul)
print(l0_tanh.shape, l0_tanh)

(108, 240) [[-9.9998523443e-01 -9.9998411920e-01 -9.9999216179e-01 ...
  -9.9999040288e-01 9.9999288572e-01 -9.9998351157e-01]
 [-9.9998363118e-01 -9.9998304738e-01 -9.9999173765e-01 ...
  -9.9998969598e-01 9.9999212775e-01 -9.9998228030e-01]
 [-9.9998509539e-01 -9.9998400271e-01 -9.9999213452e-01 ...
  -9.9999021941e-01 9.9999310775e-01 -9.9998354641e-01]
 ...
 [-9.9998456660e-01 -9.9998334824e-01 -9.9999194397e-01 ...
  -9.9998978935e-01 9.9999269552e-01 -9.9998285309e-01]
 [-9.9998474848e-01 -9.9998366275e-01 -9.9999181829e-01 ...
  -9.9998991838e-01 9.9999280838e-01 -9.9998307249e-01]
 [-9.9998244328e-01 -9.9998199566e-01 -9.9999132996e-01 ...
  -9.9998892270e-01 9.9999153849e-01 -9.9998107350e-01]]


In [363]:
# layer1
l1_mul = np.dot(l0_tanh, w1) + b1
print(l1_mul)

[[1.0542576179e+01 -1.3923436932e+01 1.0650502878e+01 ...
  -1.0641224353e+01 1.1124266203e+01 -1.1111853895e+01]
 [1.0533691699e+01 -1.3921994240e+01 1.0650752471e+01 ...
  -1.0634158679e+01 1.1121322843e+01 -1.1101074759e+01]
 [1.0553444790e+01 -1.3935747669e+01 1.0658281650e+01 ...
  -1.0647595225e+01 1.1123787844e+01 -1.1121694712e+01]
 ...
 [1.0539985705e+01 -1.3920750060e+01 1.0651961778e+01 ...
  -1.0637552688e+01 1.1116547171e+01 -1.1102138993e+01]
 [1.0547591128e+01 -1.3926121154e+01 1.0649515013e+01 ...
  -1.0638369430e+01 1.1122716454e+01 -1.1112916151e+01]
 [1.0524569746e+01 -1.3921855192e+01 1.0653331418e+01 ...
  -1.0626384783e+01 1.1116742467e+01 -1.1092415504e+01]]


In [364]:
l1_tanh = np.tanh(l1_mul)
print(l1_tanh)

[[9.9999999861e-01 -1.0000000000e+00 9.9999999888e-01 ...
  -9.9999999886e-01 9.9999999956e-01 -9.9999999955e-01]
 [9.9999999858e-01 -1.0000000000e+00 9.9999999888e-01 ...
  -9.9999999884e-01 9.9999999956e-01 -9.9999999954e-01]
 [9.9999999864e-01 -1.0000000000e+00 9.9999999889e-01 ...
  -9.9999999887e-01 9.9999999956e-01 -9.9999999956e-01]
 ...
 [9.9999999860e-01 -1.0000000000e+00 9.9999999888e-01 ...
  -9.9999999885e-01 9.9999999956e-01 -9.9999999955e-01]
 [9.9999999862e-01 -1.0000000000e+00 9.9999999888e-01 ...
  -9.9999999885e-01 9.9999999956e-01 -9.9999999955e-01]
 [9.9999999856e-01 -1.0000000000e+00 9.9999999888e-01 ...
  -9.9999999882e-01 9.9999999956e-01 -9.9999999954e-01]]


In [365]:
l1_idt = l1_tanh * idt1
print(l1_idt)

[[5.0086434172e-01 3.8519404420e-01 5.1267170837e-01 ... 4.6145991206e-01
  -4.5193292422e-01 3.8409620888e-01]
 [5.0086434170e-01 3.8519404420e-01 5.1267170837e-01 ... 4.6145991205e-01
  -4.5193292422e-01 3.8409620888e-01]
 [5.0086434173e-01 3.8519404420e-01 5.1267170838e-01 ... 4.6145991207e-01
  -4.5193292422e-01 3.8409620889e-01]
 ...
 [5.0086434171e-01 3.8519404420e-01 5.1267170837e-01 ... 4.6145991206e-01
  -4.5193292422e-01 3.8409620888e-01]
 [5.0086434172e-01 3.8519404420e-01 5.1267170837e-01 ... 4.6145991206e-01
  -4.5193292422e-01 3.8409620888e-01]
 [5.0086434169e-01 3.8519404420e-01 5.1267170837e-01 ... 4.6145991204e-01
  -4.5193292422e-01 3.8409620888e-01]]


In [366]:
l1_add = l1_idt + l0_tanh
print(l1_add)

[[-4.9912089271e-01 -6.1479007500e-01 -4.8732045342e-01 ...
  -5.3853049082e-01 5.4805996150e-01 -6.1588730269e-01]
 [-4.9911928947e-01 -6.1478900318e-01 -4.8732002928e-01 ...
  -5.3852978392e-01 5.4805920353e-01 -6.1588607142e-01]
 [-4.9912075366e-01 -6.1478995851e-01 -4.8732042614e-01 ...
  -5.3853030734e-01 5.4806018353e-01 -6.1588733752e-01]
 ...
 [-4.9912022489e-01 -6.1478930404e-01 -4.8732023560e-01 ...
  -5.3852987730e-01 5.4805977130e-01 -6.1588664421e-01]
 [-4.9912040676e-01 -6.1478961855e-01 -4.8732010992e-01 ...
  -5.3853000632e-01 5.4805988416e-01 -6.1588686361e-01]
 [-4.9911810159e-01 -6.1478795146e-01 -4.8731962159e-01 ...
  -5.3852901065e-01 5.4805861427e-01 -6.1588486462e-01]]


In [367]:
# layer2
l2_mul = np.dot(l1_add, w2) + b2
print(l2_mul[0])

[8.7057807388e+00 9.7739448980e+00 -9.6802189228e+00 1.1217682371e+01
 9.9726074307e+00 -9.6051454516e+00 -9.6852720385e+00 -9.0875136974e+00
 -8.7290867230e+00 -1.0766541191e+01 8.8294192553e+00 -9.1672235223e+00
 -9.2429739661e+00 8.8088767913e+00 9.3520232832e+00 -1.0248289132e+01
 1.0054259744e+01 9.3146912271e+00 9.9138347796e+00 8.9058669328e+00
 1.0887681972e+01 9.5896878571e+00 -1.1274212800e+01 -1.0287213216e+01
 8.8085954983e+00 8.9241690449e+00 -1.0284853517e+01 7.8903281269e+00
 -8.9794277418e+00 -1.0492874043e+01 -9.0900082113e+00 9.7579944807e+00
 1.0704309274e+01 -9.4553301873e+00 -9.1782347993e+00 -9.9076912602e+00
 1.0519910805e+01 -9.4591123744e+00 -9.2714955499e+00 -9.4391881416e+00
 -9.4627503588e+00 9.2926803814e+00 -9.1245947840e+00 1.0263754123e+01
 -8.8241521925e+00 9.2344300868e+00 1.1289369397e+01 -9.2793217120e+00
 9.3244490713e+00 -9.7649705742e+00 8.0525468237e+00 -8.8445393899e+00
 1.0299562620e+01 1.1131303819e+01 -9.8610698128e+00 8.7486766583e+00
 -9.75

In [368]:
l2_tanh = np.tanh(l2_mul)
print(l2_tanh)

[[9.9999994514e-01 9.9999999352e-01 -9.9999999219e-01 ...
  9.9999999615e-01 9.9999998600e-01 9.9999997433e-01]
 [9.9999994553e-01 9.9999999351e-01 -9.9999999202e-01 ...
  9.9999999614e-01 9.9999998609e-01 9.9999997414e-01]
 [9.9999994643e-01 9.9999999366e-01 -9.9999999232e-01 ...
  9.9999999638e-01 9.9999998628e-01 9.9999997530e-01]
 ...
 [9.9999994547e-01 9.9999999347e-01 -9.9999999207e-01 ...
  9.9999999620e-01 9.9999998601e-01 9.9999997469e-01]
 [9.9999994522e-01 9.9999999355e-01 -9.9999999216e-01 ...
  9.9999999622e-01 9.9999998600e-01 9.9999997457e-01]
 [9.9999994600e-01 9.9999999351e-01 -9.9999999187e-01 ...
  9.9999999616e-01 9.9999998618e-01 9.9999997423e-01]]


In [369]:
l2_idt = l2_tanh * idt2
print(l2_idt)

[[4.9913649011e-01 6.1480646938e-01 4.8732836930e-01 ... 5.3854088585e-01
  -5.4806611637e-01 6.1590468883e-01]
 [4.9913649030e-01 6.1480646937e-01 4.8732836922e-01 ... 5.3854088584e-01
  -5.4806611642e-01 6.1590468872e-01]
 [4.9913649075e-01 6.1480646946e-01 4.8732836937e-01 ... 5.3854088597e-01
  -5.4806611652e-01 6.1590468943e-01]
 ...
 [4.9913649027e-01 6.1480646935e-01 4.8732836924e-01 ... 5.3854088588e-01
  -5.4806611637e-01 6.1590468905e-01]
 [4.9913649015e-01 6.1480646940e-01 4.8732836929e-01 ... 5.3854088588e-01
  -5.4806611637e-01 6.1590468898e-01]
 [4.9913649054e-01 6.1480646937e-01 4.8732836915e-01 ... 5.3854088585e-01
  -5.4806611647e-01 6.1590468877e-01]]


In [370]:
l2_add = l2_idt + l1_add
print(l2_add)

[[1.5597392940e-05 1.6394373063e-05 7.9158798047e-06 ... 1.0395032247e-05
  -6.1548673191e-06 1.7386143888e-05]
 [1.7200832226e-05 1.7466191889e-05 8.3399434015e-06 ... 1.1101916401e-05
  -6.9128894087e-06 1.8617301258e-05]
 [1.5737089411e-05 1.6510952664e-05 7.9432245491e-06 ... 1.0578630630e-05
  -5.9329924887e-06 1.7351910275e-05]
 ...
 [1.6265378514e-05 1.7165308058e-05 8.1336477314e-06 ... 1.1008581765e-05
  -6.3450753151e-06 1.8044841206e-05]
 [1.6083383968e-05 1.6850842758e-05 8.2593661844e-06 ... 1.0879559991e-05
  -6.2322088230e-06 1.7825370845e-05]
 [1.8388951225e-05 1.8517913860e-05 8.7475606436e-06 ... 1.1875202603e-05
  -7.5022037551e-06 1.9824149406e-05]]


In [371]:
# layer3
lf = np.dot(l2_add, w3) + b3
print(lf[0:10])

[[-3.7122083512e+00]
 [-3.7021839750e+00]
 [-3.6380346920e+00]
 [-3.6777318073e+00]
 [-3.6571313232e+00]
 [-3.7200307596e+00]
 [-3.6892595717e+00]
 [-3.6801404788e+00]
 [-3.6986771973e+00]
 [-3.6903330638e+00]]


In [372]:
energy = np.sum(lf)
print(energy)

-398.0777242441414


In [373]:
#layer2 grad
lf_grad = np.ones((108, 1))
l2_grad_mul = np.dot(lf_grad, w3.T)
print(l2_grad_mul)

[[1.0663064386e-05 8.5841188503e-06 9.1053763492e-07 ... 4.0957163059e-06
  -6.7579523619e-08 1.6326536876e-05]
 [1.0663064386e-05 8.5841188503e-06 9.1053763492e-07 ... 4.0957163059e-06
  -6.7579523619e-08 1.6326536876e-05]
 [1.0663064386e-05 8.5841188503e-06 9.1053763492e-07 ... 4.0957163059e-06
  -6.7579523619e-08 1.6326536876e-05]
 ...
 [1.0663064386e-05 8.5841188503e-06 9.1053763492e-07 ... 4.0957163059e-06
  -6.7579523619e-08 1.6326536876e-05]
 [1.0663064386e-05 8.5841188503e-06 9.1053763492e-07 ... 4.0957163059e-06
  -6.7579523619e-08 1.6326536876e-05]
 [1.0663064386e-05 8.5841188503e-06 9.1053763492e-07 ... 4.0957163059e-06
  -6.7579523619e-08 1.6326536876e-05]]


In [374]:
l2_grad_idt = l2_grad_mul * idt2
print(l2_grad_idt)

[[5.3223248232e-06 5.2775718372e-06 -4.4373082428e-07 ...
  2.2057106960e-06 3.7038047574e-08 1.0055590873e-05]
 [5.3223248232e-06 5.2775718372e-06 -4.4373082428e-07 ...
  2.2057106960e-06 3.7038047574e-08 1.0055590873e-05]
 [5.3223248232e-06 5.2775718372e-06 -4.4373082428e-07 ...
  2.2057106960e-06 3.7038047574e-08 1.0055590873e-05]
 ...
 [5.3223248232e-06 5.2775718372e-06 -4.4373082428e-07 ...
  2.2057106960e-06 3.7038047574e-08 1.0055590873e-05]
 [5.3223248232e-06 5.2775718372e-06 -4.4373082428e-07 ...
  2.2057106960e-06 3.7038047574e-08 1.0055590873e-05]
 [5.3223248232e-06 5.2775718372e-06 -4.4373082428e-07 ...
  2.2057106960e-06 3.7038047574e-08 1.0055590873e-05]]


In [375]:
def tanh_grad(y, dy):
    return dy * (1.0 - y ** 2)

In [376]:
l2_grad_tanh = tanh_grad(l2_tanh, l2_grad_idt)
print(l2_grad_tanh)

[[5.8400460688e-13 6.8383838056e-14 -6.9350287472e-15 ...
  1.6975180993e-14 1.0370165338e-15 5.1628615437e-13]
 [5.7978045851e-13 6.8458666944e-14 -7.0805040124e-15 ...
  1.7046065829e-14 1.0305602874e-15 5.2006946246e-13]
 [5.7023611506e-13 6.6874196453e-14 -6.8173966182e-15 ...
  1.5962438119e-14 1.0160591291e-15 4.9667982369e-13]
 ...
 [5.8049833425e-13 6.8874126349e-14 -7.0418514640e-15 ...
  1.6742405422e-14 1.0365501534e-15 5.0907283028e-13]
 [5.8315882917e-13 6.8034406898e-14 -6.9601085661e-15 ...
  1.6692692688e-14 1.0372904374e-15 5.1146093060e-13]
 [5.7483115396e-13 6.8471870249e-14 -7.2177577041e-15 ...
  1.6934173362e-14 1.0233667917e-15 5.1828436942e-13]]


In [377]:
# layer1_grad
l1_grad_mul = np.dot(l2_grad_tanh, w2.T)
print(l1_grad_mul)

[[-2.5167910599e-10 -2.4131371040e-10 -2.8371916480e-10 ...
  -2.7868361228e-10 2.1842511740e-10 -4.2611194378e-11]
 [-2.5628473331e-10 -2.4824168830e-10 -2.9094071722e-10 ...
  -2.8517244538e-10 2.2425541569e-10 -4.5218325615e-11]
 [-2.4547341721e-10 -2.4182523238e-10 -2.8144335355e-10 ...
  -2.7429801751e-10 2.1568791808e-10 -4.5245902140e-11]
 ...
 [-2.5118832897e-10 -2.4226132868e-10 -2.8501063335e-10 ...
  -2.7940611163e-10 2.1767457156e-10 -4.1251671369e-11]
 [-2.5536500787e-10 -2.4965488222e-10 -2.9085126327e-10 ...
  -2.8373179736e-10 2.2390505617e-10 -4.6929523825e-11]
 [-2.6019880994e-10 -2.5406847943e-10 -2.9724560839e-10 ...
  -2.9117522499e-10 2.2871013210e-10 -4.7139842086e-11]]


In [378]:
l1_grad_add = l1_grad_mul + l2_grad_mul
print(l1_grad_add)

[[1.0662812707e-05 8.5838775366e-06 9.1025391576e-07 ... 4.0954376223e-06
  -6.7361098501e-08 1.6326494265e-05]
 [1.0662808101e-05 8.5838706086e-06 9.1024669421e-07 ... 4.0954311334e-06
  -6.7355268203e-08 1.6326491658e-05]
 [1.0662818912e-05 8.5838770250e-06 9.1025619157e-07 ... 4.0954420079e-06
  -6.7363835701e-08 1.6326491630e-05]
 ...
 [1.0662813197e-05 8.5838765890e-06 9.1025262429e-07 ... 4.0954368998e-06
  -6.7361849047e-08 1.6326495625e-05]
 [1.0662809021e-05 8.5838691954e-06 9.1024678366e-07 ... 4.0954325741e-06
  -6.7355618562e-08 1.6326489947e-05]
 [1.0662804187e-05 8.5838647818e-06 9.1024038931e-07 ... 4.0954251307e-06
  -6.7350813487e-08 1.6326489736e-05]]


In [379]:
l1_grad_idt = l1_grad_add * idt1
print(l1_grad_idt)

[[5.3406226746e-06 -3.3064585032e-06 4.6666143056e-07 ...
  -1.8898802872e-06 3.0442698238e-08 -6.2709445543e-06]
 [5.3406203678e-06 -3.3064558346e-06 4.6665772828e-07 ...
  -1.8898772928e-06 3.0440063334e-08 -6.2709435529e-06]
 [5.3406257828e-06 -3.3064583062e-06 4.6666259731e-07 ...
  -1.8898823110e-06 3.0443935268e-08 -6.2709435424e-06]
 ...
 [5.3406229204e-06 -3.3064581382e-06 4.6666076846e-07 ...
  -1.8898799538e-06 3.0443037434e-08 -6.2709450765e-06]
 [5.3406208284e-06 -3.3064552903e-06 4.6665777414e-07 ...
  -1.8898779576e-06 3.0440221673e-08 -6.2709428957e-06]
 [5.3406184073e-06 -3.3064535902e-06 4.6665449594e-07 ...
  -1.8898745228e-06 3.0438050101e-08 -6.2709428149e-06]]


In [380]:
l1_grad_tanh = tanh_grad(l1_tanh, l1_grad_idt)
print(l1_grad_tanh)

[[1.4876014207e-14 -1.0658109526e-17 1.0474966675e-15 ...
  -4.3215971134e-15 2.6492828606e-17 -5.5944750191e-15]
 [1.5142700853e-14 -1.0688936513e-17 1.0469656001e-15 ...
  -4.3830935322e-15 2.6646940034e-17 -5.7163910478e-15]
 [1.4556148574e-14 -1.0398942918e-17 1.0313288810e-15 ...
  -4.2668863790e-15 2.6519261563e-17 -5.4854413417e-15]
 ...
 [1.4953286488e-14 -1.0715374483e-17 1.0444432760e-15 ...
  -4.3534481074e-15 2.6905297245e-17 -5.7042379194e-15]
 [1.4727549375e-14 -1.0600833086e-17 1.0495601115e-15 ...
  -4.3463377924e-15 2.6572904422e-17 -5.5826002970e-15]
 [1.5421493481e-14 -1.0691865978e-17 1.0415721751e-15 ...
  -4.4517669060e-15 2.6890386610e-17 -5.8162512070e-15]]


In [381]:
# layer0 grad
l0_grad_mul = np.dot(l1_grad_tanh, w1.T)
print(l0_grad_mul.shape)
print(l0_grad_mul)

(108, 240)
[[-4.7025244860e-10 2.2308930833e-10 3.3887758747e-10 ...
  8.9425080268e-10 -8.3366307021e-10 3.5888943979e-10]
 [-4.7344406913e-10 2.2448279196e-10 3.4122727836e-10 ...
  9.0049781857e-10 -8.3913761903e-10 3.6101747208e-10]
 [-4.6279946163e-10 2.1847103323e-10 3.3191945180e-10 ...
  8.7779902972e-10 -8.1875998208e-10 3.5201364115e-10]
 ...
 [-4.6703407563e-10 2.2032790022e-10 3.3495588588e-10 ...
  8.8568725160e-10 -8.2558761489e-10 3.5498729443e-10]
 [-4.6782446185e-10 2.2069420531e-10 3.3558601826e-10 ...
  8.8733271665e-10 -8.2686461220e-10 3.5542151597e-10]
 [-4.7499452954e-10 2.2470568335e-10 3.4175110779e-10 ...
  9.0268861747e-10 -8.4075968255e-10 3.6139485034e-10]]


In [382]:
l0_grad_add = l0_grad_mul + l1_grad_add
print(l0_grad_add)

[[1.0662342454e-05 8.5841006259e-06 9.1059279335e-07 ... 4.0963318731e-06
  -6.8194761571e-08 1.6326853154e-05]
 [1.0662334657e-05 8.5840950914e-06 9.1058792148e-07 ... 4.0963316313e-06
  -6.8194405822e-08 1.6326852675e-05]
 [1.0662356113e-05 8.5840954961e-06 9.1058811102e-07 ... 4.0963198069e-06
  -6.8182595683e-08 1.6326843644e-05]
 ...
 [1.0662346163e-05 8.5840969169e-06 9.1058758018e-07 ... 4.0963225870e-06
  -6.8187436662e-08 1.6326850612e-05]
 [1.0662341196e-05 8.5840898896e-06 9.1058236968e-07 ... 4.0963199068e-06
  -6.8182483175e-08 1.6326845368e-05]
 [1.0662329192e-05 8.5840894875e-06 9.1058214042e-07 ... 4.0963278193e-06
  -6.8191573169e-08 1.6326851131e-05]]


In [383]:
l0_grad_tanh = tanh_grad(l0_tanh, l0_grad_add)
print(l0_grad_tanh.shape)
print(l0_grad_tanh[0])

(108, 240)
[3.1486881840e-10 2.7264257997e-10 1.4274779012e-11 3.7099052413e-01
 -2.0810115131e-01 3.2325522811e-12 4.1452893457e-10 -1.8051379791e-10
 3.4937987074e-11 1.2421544390e-12 -5.4179760789e-12 8.1425995403e-12
 -5.5448593897e-11 -2.9644574115e-10 -8.3610108010e-10 -7.8229993751e-12
 -9.0128401913e-02 -5.6807715297e-12 6.8948470342e-10 -2.2996219289e-12
 6.1578163085e-02 -3.3945426834e-10 -2.9546297709e-11 -1.3365405104e-09
 6.6812749925e-11 3.6975692827e-10 2.2611413832e-10 3.4829581936e-12
 -6.2234811006e-11 -4.5338371192e-03 3.6009344474e-10 2.3643270728e-10
 4.1903897800e-02 -6.3098134134e-12 -4.9019979206e-10 8.5053698295e-11
 1.7191101997e-10 -3.4922199644e-12 7.4208183002e-11 9.9967952011e-10
 -7.6930126010e-10 -1.2084528084e-10 -3.8455091300e-10 1.0707233974e-09
 3.6324045225e-10 -1.5987825118e-09 7.0369100938e-02 1.8012803924e-10
 -3.2770372086e-12 4.8801418403e-11 2.6275673744e-12 -2.0080403164e-10
 -3.7104443889e-10 -1.8616856128e-10 -9.7774932882e-11 -5.6366607717

In [384]:
# descriptor grad
descriptor_grad = np.dot(l0_grad_tanh, w0.T)
print(descriptor_grad.shape)
print(descriptor_grad[0, :10])

(108, 2048)
[-5.5179564029e-01 -8.5198493728e-01 1.8248961926e-01 -5.8356533252e-01
 -6.1132888814e-01 2.3906366438e-01 1.0385837187e+00 -2.1118484686e-01
 -1.8622652684e-01 -1.3494258829e+00]


## 反向 Tabulate 和 Descr

In [385]:
def read_binary_tb_grad(filename):
    # 定义各个数组的大小
    s_g_sz = 108 * 512
    r_g_sz = 108 * 512 * 4
    fusa_g_sz = 108 * 128 * 4
    fusb_g_sz = 108 * 4 * 16
    descriptor_g_sz = 108 * 2048
    
    # 总大小
    total_sz = s_g_sz + r_g_sz + fusa_g_sz + fusb_g_sz + descriptor_g_sz
    
    # 从二进制文件中读取数据
    data = np.fromfile(filename, dtype=np.float64, count=total_sz)
    
    # 分割数据
    offset = 0
    
    s_g = data[offset:offset + s_g_sz].reshape((108, 512))
    offset += s_g_sz
    
    r_g = data[offset:offset + r_g_sz].reshape((108, 512, 4))
    offset += r_g_sz
    
    fusa_g = data[offset:offset + fusa_g_sz].reshape((108, 128, 4))
    offset += fusa_g_sz
    
    fusb_g = data[offset:offset + fusb_g_sz].reshape((108, 4, 16))
    offset += fusb_g_sz
    
    descriptor_g = data[offset:offset + descriptor_g_sz].reshape((108, 2048))
    
    print("s_g shape:", s_g.shape)
    print("r_g shape:", r_g.shape)
    print("fusionA_g shape:", fusa_g.shape)
    print("fusionB_g shape:", fusb_g.shape)
    print("descriptor_g shape:", descriptor_g.shape)
    
    return s_g, r_g, fusa_g, fusb_g, descriptor_g

In [386]:
s_g, r_g, fusa_g, fusb_g, descriptor_g = read_binary_tb_grad("params/tb_grad.txt")

s_g shape: (108, 512)
r_g shape: (108, 512, 4)
fusionA_g shape: (108, 128, 4)
fusionB_g shape: (108, 4, 16)
descriptor_g shape: (108, 2048)


In [387]:
print(f"A_g {fusa_g.shape}\n", fusa_g[0])
print(f"B_g {fusb_g.shape}\n", fusb_g[0])

A_g (108, 128, 4)
 [[-4.0470158052e-04 8.9958930450e-03 1.0030105603e-03 -5.4812650419e-04]
 [-3.9452103708e-03 -1.4589006547e-04 1.5870172800e-03 6.9412930990e-03]
 [-1.1303287906e-03 8.3044126257e-03 7.1635066977e-04 1.7617123641e-02]
 [-1.7815802729e-03 -7.3743457976e-03 -3.7260702871e-04 -6.4282828611e-04]
 [1.1735798318e-04 -8.8564773172e-03 -4.9137248891e-03 -1.2320667237e-03]
 [-1.0888761268e-04 -4.3638988360e-03 1.0196023939e-04 1.3486232845e-04]
 [-1.3677367497e-03 -2.1468783764e-03 -1.9492645352e-04 -1.6981670543e-03]
 [-5.7225409078e-03 -4.7996533243e-04 3.8717965447e-03 -1.2703753961e-04]
 [8.8057927425e-04 2.1249522233e-05 -7.0471772271e-05 6.1666012454e-04]
 [-1.0641949152e-03 -3.4194612605e-05 3.2132870813e-04 -4.5495425006e-04]
 [-4.7818130253e-04 -1.6662845983e-04 -7.4529530445e-04 7.6495963887e-05]
 [1.2331035649e-03 3.5561662044e-03 1.0352179338e-04 -6.0454633516e-04]
 [-3.4803036692e-04 4.3171624521e-03 2.6955961713e-03 -1.6622484642e-03]
 [-4.1576517678e-04 -5.6045

In [388]:
print(descriptor_g)

[[-5.5179564029e-01 -8.5198493728e-01 1.8248961926e-01 ...
  -4.7739679415e-02 1.4526164775e+00 1.6994311822e+00]
 [-5.6881353311e-01 -8.6188859243e-01 1.8591857427e-01 ...
  -4.8064701136e-02 1.4682491111e+00 1.7101961567e+00]
 [-5.6964021478e-01 -8.5783657288e-01 1.8378497578e-01 ...
  -3.9630740178e-02 1.4449352494e+00 1.6879176567e+00]
 ...
 [-5.6385722277e-01 -8.5356488475e-01 1.8449215881e-01 ...
  -4.9570456804e-02 1.4114210883e+00 1.6472960810e+00]
 [-5.6371131789e-01 -8.5628698852e-01 1.8279608555e-01 ...
  -4.6815143476e-02 1.4744780966e+00 1.7205967615e+00]
 [-5.8492488961e-01 -8.6857210043e-01 1.8875510937e-01 ...
  -4.6749890671e-02 1.4558419771e+00 1.6892467841e+00]]


In [390]:
# 计算 fusa_grad fusb_grad 和软件代码执行结果对比
A_g = np.zeros((108, 4, 128))
B_g = np.zeros((108, 4, 16))
d_g = descriptor_g.reshape((108, 128, 16))

for i in range(108):
    A_g[i] = np.dot(fusb[i], d_g[i].T)
    B_g[i] = np.dot(fusa[i], d_g[i])

In [394]:
A_g[:, :, :16] += B_g[:, :, :16]

In [395]:
A_g *= 4/2048

In [396]:
print(f"A_g {A_g.shape}\n", A_g[0])
print(f"B_g {B_g.shape}\n", B_g[0])

A_g (108, 4, 128)
 [[-4.0470158052e-04 8.9958930450e-03 1.0030105603e-03 -5.4812650419e-04
  -3.9452103708e-03 -1.4589006547e-04 1.5870172800e-03 6.9412930990e-03
  -1.1303287906e-03 8.3044126257e-03 7.1635066977e-04 1.7617123641e-02
  -1.7815802729e-03 -7.3743457976e-03 -3.7260702871e-04 -6.4282828611e-04
  1.1735798318e-04 -8.8564773172e-03 -4.9137248891e-03 -1.2320667237e-03
  -1.0888761268e-04 -4.3638988360e-03 1.0196023939e-04 1.3486232845e-04
  -1.3677367497e-03 -2.1468783764e-03 -1.9492645352e-04 -1.6981670543e-03
  -5.7225409078e-03 -4.7996533243e-04 3.8717965447e-03 -1.2703753961e-04
  8.8057927425e-04 2.1249522233e-05 -7.0471772271e-05 6.1666012454e-04
  -1.0641949152e-03 -3.4194612605e-05 3.2132870813e-04 -4.5495425006e-04
  -4.7818130253e-04 -1.6662845983e-04 -7.4529530445e-04 7.6495963887e-05
  1.2331035649e-03 3.5561662044e-03 1.0352179338e-04 -6.0454633516e-04
  -3.4803036692e-04 4.3171624521e-03 2.6955961713e-03 -1.6622484642e-03
  -4.1576517678e-04 -5.6045032883e-03 -2