In [None]:
import numpy as np
import pynq
from pynq import allocate
import time  # Import the time module for latency measurement

# Step 1: Load the FPGA overlay
overlay = pynq.Overlay('design_1_wrapper.bit')  # Replace with your actual bitstream file name
cnn_ip = overlay.cnn_top_0  # Replace with the actual name of your IP block
print(cnn_ip.signature)  # Print the IP's signature to confirm setup

# Step 2: Load and parse the in.dat file
def load_images_from_file(file_path, img_rows=28, img_cols=28):
    """
    Load images from in.dat file.

    Args:
        file_path (str): Path to the input file containing image data.
        img_rows (int): Number of rows per image.
        img_cols (int): Number of columns per image.

    Returns:
        np.ndarray: Array of images with shape (num_images, img_rows, img_cols).
    """
    with open(file_path, 'r') as file:
        data = file.read()
    
    # Flatten the data into a list of integers
    flat_data = list(map(int, data.split()))
    
    # Calculate the number of images
    num_images = len(flat_data) // (img_rows * img_cols)
    
    # Reshape the flat data into images
    images = np.array(flat_data).reshape(num_images, img_rows, img_cols)
    return images

# Step 3: Allocate memory for input and output
IMG_ROWS = 28
IMG_COLS = 28
DIGITS = 10

image_buffer = allocate(shape=(IMG_ROWS, IMG_COLS), dtype=np.float32)
prediction_buffer = allocate(shape=(DIGITS,), dtype=np.float32)

# Step 4: Load images
image_file_path = 'in.dat'  # Path to your in.dat file
images = load_images_from_file(image_file_path)
print(f"Loaded {len(images)} images from {image_file_path}.")

# Step 5: Measure latency for each image
latencies = []

for img_idx, image in enumerate(images):
    # Populate the image buffer with the current image
    np.copyto(image_buffer, image)

    # Get the physical addresses of the buffers
    image_ptr = image_buffer.physical_address
    prediction_ptr = prediction_buffer.physical_address

    # Write buffer addresses to the IP
    cnn_ip.write(0x10, image_ptr)       # Write image buffer address
    cnn_ip.write(0x1C, prediction_ptr)  # Write prediction buffer address

    # Start the FPGA processing and measure time
    start_time = time.time()
    cnn_ip.write(0x00, 1)  # Start the computation

    # Wait for FPGA to complete
    is_ready = cnn_ip.read(0x00)
    while is_ready == 1:  # 1 indicates it's still running
        is_ready = cnn_ip.read(0x00)
    end_time = time.time()

    # Calculate latency for this image
    latency = end_time - start_time
    latencies.append(latency)

# Calculate and display the average latency
average_latency = sum(latencies) / len(latencies)
print(f"\nAverage Latency: {average_latency:.6f} seconds")
