In [1]:
from pynq import Overlay
from pynq import allocate
import numpy as np
from mnist_driver import MNISTDriver
import time

**Load the overlay**

In [2]:
ol = Overlay("/home/xilinx/pynq/overlays/mnist/mnist.bit")
hw = ol.mnist

Initial control register:  4
Updated control register:  129


**Load the testing data**

In [3]:
with open('mnist_images.npy', 'rb') as f:
    images = np.load(f)
with open('mnist_results.npy', 'rb') as f:
    results = np.load(f)
# Load the weights and biases for the software test
hidden_weights = np.load('hidden_layer_weights.npy')
hidden_biases = np.load('hidden_layer_biases.npy')
output_weights = np.load('output_layer_weights.npy')
output_biases = np.load('output_layer_biases.npy')

# Helper function
def within_std(arr, val):
    std = np.std(arr)
    count = 0
    for num in arr:
        if (val < num < (val + std)) or ((val - std) < num < val):
            count = count + 1
    return count

**FPGA Accuracy Test**

In [4]:
test_images = images[-10000:-1]
test_results = results[-10000:-1]
result = allocate(shape=((10,)), dtype=np.uint8)
total_correct = 0
total_test_images = len(test_results)

# Create the memory structure needed for FPGA inference
input_images = allocate(shape=((total_test_images, 28*28)), dtype=np.int32)
results = allocate(shape=((total_test_images, 10)), dtype=np.int32)
for i in range(total_test_images):
    input_images[i] = test_images[i].flatten().astype(np.int32)

start = time.time()
hw.batch_inference(input_images, results)
end = time.time()
for itr, result in enumerate(results):
    if test_results[itr] == np.argmax(result):
        total_correct = total_correct + 1

print("FPGA Accuracy: " + str((total_correct / total_test_images) * 100) + "%")
print("Time elapsed: " + str(end - start))
del(input_images)
del(results)

FPGA Accuracy: 94.82948294829482%
Time elapsed: 11.398129940032959


**Software Accuracy Test**

In [5]:
def relu(vec):
    return np.maximum(vec, 0)

def inference(image):

    # Flatten the input
    image = image.flatten()

    # Calculate the hidden layer output
    hidden_layer = np.matmul(image.astype(np.int32), hidden_weights.T.astype(np.int32)) + hidden_biases
    hidden_layer = relu(hidden_layer)

    # Calculate the output layer
    output_layer = np.matmul(hidden_layer.astype(np.int32), output_weights.T.astype(np.int32)) + output_biases

    return np.argmax(output_layer)

def calculate_sw(images):
    total_software = 0
    total_software_incorrect = 0
    sw_start = time.time()
    for i, img in enumerate(images):
        sw_guess = inference(img)
        if sw_guess == test_results[i]:
            total_software = total_software + 1
        else:
            total_software_incorrect = total_software_incorrect + 1
        if i % 1000 == 0:
            sw_end = time.time()
            print(str(i) + " software images processed!")
            sw_start = sw_end
    return total_software, total_software_incorrect


start_sw_time = time.time()
total_software, total_software_incorrect = calculate_sw(images=test_images)
end_sw_time = time.time()
print("Accuracy of the software model: " + str((total_software / len(test_images)) * 100) + "%")
print("Total time taken for the software model: " + str(end_sw_time - start_sw_time))
del(test_images)
del(test_results)

0 software images processed!
1000 software images processed!
2000 software images processed!
3000 software images processed!
4000 software images processed!
5000 software images processed!
6000 software images processed!
7000 software images processed!
8000 software images processed!
9000 software images processed!
Accuracy of the software model: 94.82948294829482%
Total time taken for the software model: 13.534584999084473


**FPGA Performance Test**

In [6]:
# Create the memory structure needed for FPGA inference
performace_input_len = int(len(images) / 4)
input_images = allocate(shape=((performace_input_len, 28*28)), dtype=np.int32)
results = allocate(shape=((performace_input_len, 10)), dtype=np.int32)
for i in range(performace_input_len):
    input_images[i] = images[i].flatten().astype(np.int32)

int_ops = 101756 - 50 # This is the floating point operations - softmax operations from NN_Evaluation script.
def fpga_performance_test():
    performance_start_time = time.time_ns()
    hw.batch_inference(input_images, results)
    performance_end_time  = time.time_ns()
    performance_time = (performance_end_time - performance_start_time) / 10**9
    return ((int_ops * performace_input_len) / performance_time) / (10**9)

print("Beginning FPGA performance test.")
p_test_length = time.time()
performance_score = []
for i in range(25):
    performance_score.append(fpga_performance_test())
print("Time taken for the FPGA performance test: " + str(time.time() - p_test_length))
print("Time for each iteration: " + str((time.time() - p_test_length) / 25))
average_performance_score = sum(performance_score) / len(performance_score)
print("Number of performance samples collected: {}".format(len(performance_score)))
print("Average performance: {} G-OPS".format(average_performance_score))
print("Maximum performance: {} G-OPS".format(np.max(performance_score)))
print("Standard deviation of performance: {} G-OPS".format(np.std(performance_score)))
print("Number of samples within 1 standard deviation of the mean performance: {}".format(within_std(performance_score, average_performance_score)))
print("Number of entries within 1 standard deviation of the maximum performance: {}".format(within_std(performance_score, np.max(performance_score))))
del(input_images)
del(results)

Beginning FPGA performance test.
Time taken for the FPGA performance test: 497.5825250148773
Time for each iteration: 19.903343276977537
Number of performance samples collected: 25
Average performance: 0.08942595310652393 G-OPS
Maximum performance: 0.09025376743452776 G-OPS
Standard deviation of performance: 0.00019017981218042769 G-OPS
Number of samples within 1 standard deviation of the mean performance: 23
Number of entries within 1 standard deviation of the maximum performance: 0


**CPU Performance Test**

In [8]:
def software_performance_test():
    performace_sw_start = time.time_ns()
    for i in range(performace_input_len):
        inference(images[i])
    performace_sw_end = time.time_ns()
    performance_sw_time = (performace_sw_end - performace_sw_start) / 10**9
    return ((int_ops * performace_input_len) / performance_sw_time) / (10**9)

print("Beginning software performance test.")
p_test_length = time.time()
performance_score = []
for i in range(25):
    performance_score.append(software_performance_test())
print("Time taken for the software performance test: " + str(time.time() - p_test_length))
print("Time for each iteration: " + str((time.time() - p_test_length) / 25))
average_performance_score = sum(performance_score) / len(performance_score)
print("Number of performance samples collected: {}".format(len(performance_score)))
print("Average performance: {} G-OPS".format(average_performance_score))
print("Maximum performance: {} G-OPS".format(np.max(performance_score)))
print("Standard deviation of performance: {} G-OPS".format(np.std(performance_score)))
print("Number of samples within 1 standard deviation of the mean performance: {}".format(within_std(performance_score, average_performance_score)))
print("Number of entries within 1 standard deviation of the maximum performance: {}".format(within_std(performance_score, np.max(performance_score))))

Beginning software performance test.
Time taken for the software performance test: 591.1003868579865
Time for each iteration: 23.64405774116516
Number of performance samples collected: 25
Average performance: 0.075277594339052 G-OPS
Maximum performance: 0.07532152845163172 G-OPS
Standard deviation of performance: 5.381938634194591e-05 G-OPS
Number of samples within 1 standard deviation of the mean performance: 21
Number of entries within 1 standard deviation of the maximum performance: 19


**FPGA Power Test**

In [9]:
power_images = images[0 : 1000]
power_input_images = allocate(shape=((len(power_images), 28*28)), dtype=np.int32)
power_results = allocate(shape=((len(power_images), 10)), dtype=np.int32)
for itr, image in enumerate(power_images):
    power_input_images[itr] = image.flatten().astype(np.int32)
input("Press enter when you are ready to start the power draw test. Please record the nominal power draw before proceeding.")
timer = time.time()
while (time.time() - timer) < 60:
    hw.batch_inference(power_input_images, power_results)
print("FPGA power test has concluded.")
del(power_input_images)
del(power_results)

Press enter when you are ready to start the power draw test. Please record the nominal power draw before proceeding.
FPGA power test has concluded.


**CPU Power Test**

In [10]:
input("Press enter when you are ready to start the power draw test. Please record the nominal power draw before proceeding.")
timer = time.time()
while (time.time() - timer) < 60:
    for image in power_images:
        inference(image)
print("FPGA power test has concluded.")

Press enter when you are ready to start the power draw test. Please record the nominal power draw before proceeding.
FPGA power test has concluded.
