In [24]:
import numpy as np
import time
import os
print(os.getcwd())

# Load test cases for FPGA validation
loaded_test_cases = np.load("../test_cases.npy", allow_pickle=True)
loaded_golden_results = np.load("../golden_results.npy", allow_pickle=True)

# Validate on FPGA
from pynq import Overlay, allocate

# Load the FPGA bitstream
overlay_baseline = Overlay("matmult_optimized.bit")
baseline = overlay_baseline.matmul_optimized_0

/root/jupyter_notebooks/getting_started/matmul_optimized_64


In [25]:
for idx, (A, B) in enumerate(loaded_test_cases):
    # Flatten matrices for FPGA input
    A_flat = A.flatten()
    B_flat = B.flatten()
    
    # Allocate buffers for 64x64 matrices
    input1_buffer = allocate(shape=(64*64,), dtype='f4')
    input2_buffer = allocate(shape=(64*64,), dtype='f4')
    output_buffer = allocate(shape=(64*64,), dtype='f4')
    
    # Copy test case data to buffers
    np.copyto(input1_buffer, A_flat)
    np.copyto(input2_buffer, B_flat)
    
    # Sync buffers to device
    input1_buffer.sync_to_device()
    input2_buffer.sync_to_device()
    print("input1_buffer:", input1_buffer)
    print("input2_buffer:", input2_buffer)

input1_buffer: [0.81966764 0.1675325  0.04471349 ... 0.4347821  0.74261725 0.9171499 ]
input2_buffer: [0.8811151  0.4946358  0.4713695  ... 0.20226885 0.82889223 0.3881654 ]
input1_buffer: [0.14156799 0.97300345 0.8477219  ... 0.5446532  0.91756237 0.26658309]
input2_buffer: [0.58535784 0.39883885 0.91090226 ... 0.57346183 0.8548178  0.41129366]
input1_buffer: [0.39106223 0.73678154 0.26080117 ... 0.7858722  0.9175363  0.25514588]
input2_buffer: [0.13963683 0.8187183  0.30443844 ... 0.33749834 0.7544042  0.05446207]
input1_buffer: [0.9742658  0.16500747 0.45274958 ... 0.7454064  0.29902488 0.3715154 ]
input2_buffer: [0.44239575 0.8698275  0.46590012 ... 0.16378956 0.9577368  0.6995783 ]
input1_buffer: [0.01249138 0.9045198  0.62501895 ... 0.6883723  0.29389095 0.6857146 ]
input2_buffer: [0.10452426 0.75112146 0.91367984 ... 0.5623621  0.10725173 0.94910663]
input1_buffer: [0.8620643  0.7209815  0.02044337 ... 0.97894245 0.19960774 0.34691927]
input2_buffer: [0.42660198 0.6870792  0.973

In [26]:
    # Write buffer addresses to FPGA (both low and high parts)
    baseline.write(baseline.register_map.A_1.address, input1_buffer.physical_address & 0xFFFFFFFF)
    baseline.write(baseline.register_map.A_2.address, (input1_buffer.physical_address >> 32) & 0xFFFFFFFF)
    baseline.write(baseline.register_map.B_1.address, input2_buffer.physical_address & 0xFFFFFFFF)
    baseline.write(baseline.register_map.B_2.address, (input2_buffer.physical_address >> 32) & 0xFFFFFFFF)
    baseline.write(baseline.register_map.Out_r_1.address, output_buffer.physical_address & 0xFFFFFFFF)
    baseline.write(baseline.register_map.Out_r_2.address, (output_buffer.physical_address >> 32) & 0xFFFFFFFF)
    
    # Start the FPGA computation
    baseline.write(0x00, 0x01)
    
    # Start timer
    start_time = time.time()
    
    # Wait for completion
    while (baseline.read(0x00) & 0x2) == 0:  # Wait for the AP_DONE bit
        pass
    
    end_time = time.time()
    baseline_time = end_time - start_time
    print(f"HW mul (baseline) exe time: {baseline_time:.6f}s")
    
    # Sync result buffer from device
    output_buffer.sync_from_device()
    
    print("output_buffer:", output_buffer)

HW mul (baseline) exe time: 0.000402s
output_buffer: [4.1468    3.818788  4.2527423 ... 0.        0.        0.       ]


In [27]:
    # Reshape FPGA output to 64x64
    fpga_result = output_buffer.reshape((64, 64))
    print("Output Matrix:", fpga_result)
    # Compare FPGA result with golden result
    golden_result = loaded_golden_results[idx]
    diff = np.abs(fpga_result - golden_result)
    max_diff = np.max(diff)
    print(f"Test Case {idx + 1}: Max Difference = {max_diff}")
    
    # Free buffers
    input1_buffer.freebuffer()
    input2_buffer.freebuffer()
    output_buffer.freebuffer()

print("Validation complete.")


IndentationError: unexpected indent (4256819411.py, line 2)