In [None]:
from time import perf_counter as pc

import numpy as np

In [None]:
def product_real(a, b):
    return a.real * b.real - a.imag * b.imag


def matrix_product_real(a, b):
    return a.real @ b.real - a.imag @ b.imag


def abs_max(a, axis=None):
    a_max = np.max(a, axis=axis)
    a_min = np.min(a, axis=axis)
    return np.where(-a_min > a_max, a_min, a_max)

It seems for both scalars and arrays, just taking the real part after performing
a complex product is faster than computing only the real part of the product.

Perhaps when working with sparse arrays or other optimizations (e.g.,
JIT-compiling or binding to a Rust implementation) we could see improvements.

In [None]:
num_runs = 1000

rng = np.random.default_rng(42)
a = rng.random(num_runs) + 1j * rng.random(num_runs)
b = rng.random(num_runs) + 1j * rng.random(num_runs)

res0 = np.full((num_runs), np.inf)
start = pc()
for j in range(num_runs):
    res0[j] = (a[j] * b[j]).real
end = pc()
print(end - start)

res1 = np.full(num_runs, np.inf)
start = pc()
for j in range(num_runs):
    res1[j] = product_real(a[j], b[j])
end = pc()
print(end - start)

abs_max(res0 - res1)

0.0003556949998255732
0.0010560360001363733


array(0.)

In [None]:
J = 10
num_runs = 1000

rng = np.random.default_rng(42)
a = rng.random((num_runs, J, J)) + 1j * rng.random((num_runs, J, J))
b = rng.random((num_runs, J, J)) + 1j * rng.random((num_runs, J, J))

res0 = np.full((num_runs, J, J), np.inf)
start = pc()
for j in range(num_runs):
    res0[j] = (a[j] @ b[j]).real
end = pc()
print(end - start)

res1 = np.full((num_runs, J, J), np.inf)
start = pc()
for j in range(num_runs):
    res1[j] = matrix_product_real(a[j], b[j])
end = pc()
print(end - start)

abs_max(res0 - res1)

0.006464920000098573
0.010170195000000604


array(2.22044605e-15)