In [2]:
import numpy as np
import time
from pynq import Overlay, allocate

# Define dimensions
input_dim = (1, 32, 32)
weights_dim = (6, 1, 5, 5)
bias_dim = (6,)
output_dim = (6, 28, 28)

# Generate random input, weights, and biases
num_test_cases = 5  # Number of test cases
test_results = []

for test_id in range(num_test_cases):
    # Generate random input, weights, and bias
    # input_data = np.random.randn(*input_dim).astype('f4')
    # weights_data = np.random.randn(*weights_dim).astype('f4')
    # bias_data = np.random.randn(*bias_dim).astype('f4')
    input_data = np.ones(input_dim, dtype='f4')
    weights_data = np.ones(weights_dim, dtype='f4')
    bias_data = np.zeros(bias_dim, dtype='f4')


In [3]:
from pynq import Overlay
    
overlay = Overlay("lenet_conv1.bit")
conv_hw = overlay.convolution1_hls_0  

In [4]:
from pynq import allocate

print("Allocating buffers...")
# Allocate buffers
input_buffer = allocate(shape=(1*32*32,), dtype='f4')
weights_buffer = allocate(shape=(6*1*5*5,), dtype='f4')
bias_buffer = allocate(shape=(6,), dtype='f4')
output_buffer = allocate(shape=(6*28*28,), dtype='f4')
print("Buffers allocated.")

Allocating buffers...
Buffers allocated.


In [5]:
import numpy as np

print("Copying data to buffers...")
# Copy data to buffers
np.copyto(input_buffer, input_data.flatten())
np.copyto(weights_buffer, weights_data.flatten())
np.copyto(bias_buffer, bias_data)

print("Syncing buffers to the device...")
# Sync data to the device
input_buffer.sync_to_device()
weights_buffer.sync_to_device()
bias_buffer.sync_to_device()
print("Buffers synced to the device.")

Copying data to buffers...
Syncing buffers to the device...
Buffers synced to the device.


In [6]:
conv_hw.register_map

RegisterMap {
  input_r_1 = Register(input_r=write-only),
  input_r_2 = Register(input_r=write-only),
  weights_1 = Register(weights=write-only),
  weights_2 = Register(weights=write-only),
  bias_1 = Register(bias=write-only),
  bias_2 = Register(bias=write-only),
  output_r_1 = Register(output_r=write-only),
  output_r_2 = Register(output_r=write-only)
}

In [7]:
print("Writing addresses to registers...")
# Write memory addresses to the registers
conv_hw.write(conv_hw.register_map.input_r_1.address, input_buffer.physical_address & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.input_r_2.address, (input_buffer.physical_address >> 32) & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.weights_1.address, weights_buffer.physical_address & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.weights_2.address, (weights_buffer.physical_address >> 32) & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.bias_1.address, bias_buffer.physical_address & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.bias_2.address, (bias_buffer.physical_address >> 32) & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.output_r_1.address, output_buffer.physical_address & 0xFFFFFFFF)
conv_hw.write(conv_hw.register_map.output_r_2.address, (output_buffer.physical_address >> 32) & 0xFFFFFFFF)
print("Addresses written to registers.")

print(f"Input Address: 0x{input_buffer.physical_address:X}")
print(f"Weights Address: 0x{weights_buffer.physical_address:X}")
print(f"Bias Address: 0x{bias_buffer.physical_address:X}")
print(f"Output Address: 0x{output_buffer.physical_address:X}")

Writing addresses to registers...
Addresses written to registers.
Input Address: 0x2883000
Weights Address: 0x2B8E000
Bias Address: 0x2B8F000
Output Address: 0x375C0000


In [12]:
input_buffer.sync_to_device()
weights_buffer.sync_to_device()
bias_buffer.sync_to_device()
timeout_counter = 0
max_timeout = 100
print("Starting hardware execution...")
# Start the computation
conv_hw.write(0x00, 0x01)

while conv_hw.read(0x00) & 0x2 == 0:
    timeout_counter += 1
    if timeout_counter > max_timeout:
        print("ERROR: Timeout waiting for hardware to complete.")
        break
    if timeout_counter <= max_timeout:
        print("Hardware execution completed successfully.")
        pass

output_buffer.sync_from_device()
print("End passing")

Starting hardware execution...
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution completed successfully.
Hardware execution comp

In [14]:
import numpy as np
import time
from pynq import Overlay, allocate

# Wait for completion
print("Waiting for hardware to complete...")
start_time = time.time()
while True:
    reg = conv_hw.read(0x00)
    if reg != 1:
        break
end_time = time.time()
print("HW mul (baseline) exe time: {}s".format(end_time - start_time))
print("Hardware execution completed.")

print("Syncing output from the device...")
# Sync the output from the device

output_hw = output_buffer.reshape(output_dim)
print("Output synced from the device.")

print("Performing software validation...")
# Software simulation for validation
output_sw = np.zeros(output_dim, dtype='f4')
for co in range(6):
    for h in range(28):
        for w in range(28):
            patch = input_data[0, h:h+5, w:w+5]
            output_sw[co, h, w] = np.sum(weights_data[co, 0] * patch) + bias_data[co]
print("Software validation completed.")

# Validate the results
diff = np.abs(output_hw - output_sw)
max_diff = np.max(diff)
test_results.append(max_diff)
print(f"Test Case {test_id + 1}: Max Difference = {max_diff:.6f}")

# Release buffers
input_buffer.close()
weights_buffer.close()
bias_buffer.close()
output_buffer.close()
print(f"Test Case {test_id + 1}: Finished.\n")

# Summary
print("All test cases completed.")
if all(diff < 1e-5 for diff in test_results):
    print("All tests passed.")


Waiting for hardware to complete...
HW mul (baseline) exe time: 0.0004646778106689453s
Hardware execution completed.
Syncing output from the device...
Output synced from the device.
Performing software validation...
Software validation completed.
Test Case 5: Max Difference = 25.000000
Test Case 5: Finished.

All test cases completed.
Some test cases failed validation.
