# Thomas Solver Overlay Test

This notebook compares the FPGA implementation of a tridiagonal matrix solver against a NumPy implementation. It measures the execution time of each approach and checks that both produce the same result.

In [None]:
from pynq import Overlay, allocate
import numpy as np
import time

In [None]:
# Load overlay and inspect available IP blocks
overlay = Overlay('custom_tomas_solver_v1.bit')
print(overlay.ip_dict)
# Replace 'thomas_solver_0' with the actual IP name from the printed dictionary if different
solver_ip = overlay.thomas_solver_0

In [None]:
N = 64
# Constants describing the tridiagonal matrix
dp = np.complex64(4+0j)
dp1 = np.complex64(3+0j)
dp2 = np.complex64(3+0j)
off = np.complex64(1+0j)

# Allocate buffers accessible to the FPGA
a_b = allocate(shape=(N,), dtype=np.complex64)
a_x = allocate(shape=(N,), dtype=np.complex64)

# Random right-hand side vector
b_np = (np.random.rand(N) + 1j*np.random.rand(N)).astype(np.complex64)
a_b[:] = b_np

In [None]:
def thomas_solver_numpy(dp, dp1, dp2, off, b):
    N = b.shape[0]
    c_prime = np.empty(N, dtype=np.complex64)
    d_prime = np.empty(N, dtype=np.complex64)
    inv = 1.0/np.complex64(dp1)
    c_prime[0] = off * inv
    d_prime[0] = b[0] * inv
    for i in range(1, N-1):
        denom = dp - off * c_prime[i-1]
        inv = 1.0/denom
        c_prime[i] = off * inv
        d_prime[i] = (b[i] - off * d_prime[i-1]) * inv
    denom = dp2 - off * c_prime[N-2]
    d_prime[N-1] = (b[N-1] - off * d_prime[N-2]) / denom
    x = np.empty(N, dtype=np.complex64)
    x[-1] = d_prime[-1]
    for i in range(N-2, -1, -1):
        x[i] = d_prime[i] - c_prime[i] * x[i+1]
    return x

In [None]:
# CPU reference implementation
t0 = time.time()
x_ref = thomas_solver_numpy(dp, dp1, dp2, off, b_np)
cpu_time = time.time() - t0
print(f'CPU time: {cpu_time*1e3:.3f} ms')

In [None]:
# Configure solver IP
a_rm = solver_ip.register_map
a_rm.dp_r = float(np.real(dp))
a_rm.dp_i = float(np.imag(dp))
a_rm.dp1_r = float(np.real(dp1))
a_rm.dp1_i = float(np.imag(dp1))
a_rm.dp2_r = float(np.real(dp2))
a_rm.dp2_i = float(np.imag(dp2))
a_rm.off_r = float(np.real(off))
a_rm.off_i = float(np.imag(off))
a_rm.b = a_b.physical_address
a_rm.x = a_x.physical_address

# Run hardware solver
t0 = time.time()
a_rm.CTRL.AP_START = 1
while a_rm.CTRL.AP_DONE == 0:
    a_rm = solver_ip.register_map
hw_time = time.time() - t0
print(f'Hardware time: {hw_time*1e3:.3f} ms')

In [None]:
# Compare results
x_hw = np.array(a_x)
print('Results match:', np.allclose(x_ref, x_hw, atol=1e-6))

In [None]:
overlay.free()
a_b.free()
a_x.free()