In [2]:
def pretty_print_speed(speed):
    speed *= 8 # b/s to bps
    if speed > 1024 * 1024:
        speed /= 1024 * 1024
        return f'{speed:.2f} Mbps'
    elif speed > 1024:
        speed /= 1024
        return f'{speed:.2f} Kbps'
    else:
        return f'{speed:.2f} bps'

In [3]:
import pynq
import numpy as np
import time

class Pynqrypt:
    
    def __init__(self, file, post_ap=False):
        self.overlay = pynq.Overlay(file)
        self.ip = self.overlay.pynqrypt_encrypt_0
        self.reg_map = self.ip.register_map
        self.post_ap = post_ap
        if not post_ap:
            self.key_mmio = pynq.MMIO(self.ip.mmio.base_addr + self.reg_map.Memory_key.address, 16 * 8)
            self.nonce_mmio = pynq.MMIO(self.ip.mmio.base_addr + self.reg_map.Memory_nonce.address, 12 * 8)
        self.length = 0
        
    
    def set_key(self, key: bytes):
        if not self.post_ap:
            for i in range(4):
                self.key_mmio.write(4 * i, key[i * 4: (i + 1) * 4])
        else:
            key = int.from_bytes(key, 'big')
            self.reg_map.key_1 = (key) & (2**32 - 1)
            self.reg_map.key_2 = (key >> 32) & (2**32 - 1)
            self.reg_map.key_3 = (key >> 64) & (2**32 - 1)
            self.reg_map.key_4 = (key >> 96) & (2**32 - 1)            

    def set_nonce(self, nonce: bytes):
        if not self.post_ap:
            for i in range(3):
                self.nonce_mmio.write(4 * i, nonce[i * 4: (i + 1) * 4])
        else:
            nonce = int.from_bytes(nonce, 'big')
            self.reg_map.nonce_1 = (nonce) & (2**32 - 1)
            self.reg_map.nonce_2 = (nonce >> 32) & (2**32 - 1)
            self.reg_map.nonce_3 = (nonce >> 64) & (2**32 - 1)
            
    def set_length(self, length: int):
        self.reg_map.plaintext_length_1 = length 
        self.length = length
        
    def get_length(self) -> int:
        return self.reg_map.plaintext_length_1
    
    def get_input_array(self):
        buff = pynq.allocate(self.length, np.uint8)
        self.inbuff = buff
        return buff
    
    def get_output_array(self):
        buff = pynq.allocate(self.length, np.uint8)
        self.outbuff = buff
        return buff
    
    def prepare(self):
        self.inbuff.flush()
        self.reg_map.plaintext_1 = self.inbuff.physical_address
        self.reg_map.ciphertext_1 = self.outbuff.physical_address
    
    def run_blocking(self):
        if not self.reg_map.CTRL.AP_IDLE:
            return
        self.reg_map.CTRL.AP_START = 1
        while not self.reg_map.CTRL.AP_DONE:
            continue
            
    def run_blocking_timed(self):
        if not self.reg_map.CTRL.AP_IDLE:
            return
        
        time_start = time.perf_counter_ns()
        self.reg_map.CTRL.AP_START = 1
        while not self.reg_map.CTRL.AP_DONE:
            continue
        time_stop = time.perf_counter_ns()
        
        return (time_start, time_stop)
        
    def cleanup(self):
        del self.outbuff
        del self.inbuff
        

In [4]:
from Crypto.Cipher import AES
from random import randbytes
import numpy as np
import time
from typing import Tuple

In [5]:
class PynqryptTester:
    
    def __init__(self, file, post_ap=False):
        self.file = file
        self.pynqrypt = Pynqrypt(file=file, post_ap=post_ap)
            
    def benchmark_and_validate_size(self, length: int = 65536) -> Tuple[int, int]:
        # prepare variables
        key = randbytes(16)
        nonce = randbytes(12)
        
        self.pynqrypt.set_key(key)
        self.pynqrypt.set_nonce(nonce)
        self.pynqrypt.set_length(length)
        random_data = randbytes(length)
        input_data = np.frombuffer(random_data, np.uint8)
        inbuff = self.pynqrypt.get_input_array()
        inbuff[:] = input_data[:]
        outbuff = self.pynqrypt.get_output_array()
        self.pynqrypt.prepare()
        
        # take time measurement of the following call
        start, stop = self.pynqrypt.run_blocking_timed()
        
        outbuff.invalidate()
        
        # validate correctness of operation
        cipher = AES.new(key, AES.MODE_CTR, nonce=nonce)
        start2 = time.perf_counter_ns()
        expected_output = cipher.encrypt(random_data)
        stop2 = time.perf_counter_ns()
        assert bytes(outbuff) == expected_output
        
        # clear everything
        del input_data
        del expected_output
        self.pynqrypt.cleanup()
        
        # return time difference
        return (stop - start), (stop2 - start2)
        
    def run_bench_suite(self):
        for size in [16, 1024, 256 * 1024, 16 * 1024 * 1024]:
            diff_fpga = []
            diff_cpu = []
            for _ in range(10):
                tfpga, tcpu = self.benchmark_and_validate_size(size)
                diff_fpga.append(tfpga)
                diff_cpu.append(tcpu)
            diff_fpga.sort()
            diff_cpu.sort()
            
            res_fpga = sum(diff_fpga[1:-1]) / 8e9
            res_cpu = sum(diff_cpu[1:-1]) / 8e9
            
            print(f'Results for bitstream {self.file} with size {size} b:')
            print(f'FPGA: {res_fpga*1000:.2f} ms')
            print(f'CPU: {res_cpu*1000:.2f} ms')
            print(f'FPGA Throughput: ' + pretty_print_speed(size / res_fpga))
            
            

In [5]:
tester = PynqryptTester('./pynqrypt-v1.xsa')

In [6]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v1.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.11 ms
FPGA Throughput: 769.69 Kbps
Results for bitstream ./pynqrypt-v1.xsa with size 1024 b:
FPGA: 0.42 ms
CPU: 0.21 ms
FPGA Throughput: 18.76 Mbps
Results for bitstream ./pynqrypt-v1.xsa with size 262144 b:
FPGA: 67.86 ms
CPU: 24.52 ms
FPGA Throughput: 29.47 Mbps
Results for bitstream ./pynqrypt-v1.xsa with size 16777216 b:
FPGA: 4333.76 ms
CPU: 1641.08 ms
FPGA Throughput: 29.54 Mbps


In [7]:
tester = PynqryptTester('./pynqrypt-v2.xsa')

In [8]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v2.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.12 ms
FPGA Throughput: 776.53 Kbps
Results for bitstream ./pynqrypt-v2.xsa with size 1024 b:
FPGA: 0.39 ms
CPU: 0.21 ms
FPGA Throughput: 19.79 Mbps
Results for bitstream ./pynqrypt-v2.xsa with size 262144 b:
FPGA: 68.10 ms
CPU: 24.29 ms
FPGA Throughput: 29.37 Mbps
Results for bitstream ./pynqrypt-v2.xsa with size 16777216 b:
FPGA: 4348.58 ms
CPU: 1640.27 ms
FPGA Throughput: 29.43 Mbps


In [9]:
tester = PynqryptTester('./pynqrypt-v3.xsa')

In [10]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v3.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.11 ms
FPGA Throughput: 806.25 Kbps
Results for bitstream ./pynqrypt-v3.xsa with size 1024 b:
FPGA: 0.38 ms
CPU: 0.20 ms
FPGA Throughput: 20.62 Mbps
Results for bitstream ./pynqrypt-v3.xsa with size 262144 b:
FPGA: 56.50 ms
CPU: 24.28 ms
FPGA Throughput: 35.40 Mbps
Results for bitstream ./pynqrypt-v3.xsa with size 16777216 b:
FPGA: 3607.68 ms
CPU: 1635.66 ms
FPGA Throughput: 35.48 Mbps


In [11]:
tester = PynqryptTester('./pynqrypt-v4.xsa')

In [12]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v4.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.12 ms
FPGA Throughput: 792.63 Kbps
Results for bitstream ./pynqrypt-v4.xsa with size 1024 b:
FPGA: 0.34 ms
CPU: 0.21 ms
FPGA Throughput: 22.97 Mbps
Results for bitstream ./pynqrypt-v4.xsa with size 262144 b:
FPGA: 45.91 ms
CPU: 24.58 ms
FPGA Throughput: 43.56 Mbps
Results for bitstream ./pynqrypt-v4.xsa with size 16777216 b:
FPGA: 2928.72 ms
CPU: 1638.62 ms
FPGA Throughput: 43.71 Mbps


In [13]:
tester = PynqryptTester('./pynqrypt-v5.1.xsa', post_ap=True)

In [14]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v5.1.xsa with size 16 b:
FPGA: 0.17 ms
CPU: 0.12 ms
FPGA Throughput: 746.04 Kbps
Results for bitstream ./pynqrypt-v5.1.xsa with size 1024 b:
FPGA: 0.28 ms
CPU: 0.21 ms
FPGA Throughput: 27.49 Mbps
Results for bitstream ./pynqrypt-v5.1.xsa with size 262144 b:
FPGA: 33.08 ms
CPU: 24.49 ms
FPGA Throughput: 60.46 Mbps
Results for bitstream ./pynqrypt-v5.1.xsa with size 16777216 b:
FPGA: 2107.78 ms
CPU: 1638.61 ms
FPGA Throughput: 60.73 Mbps


In [15]:
tester = PynqryptTester('./pynqrypt-v6.xsa', post_ap=True)

In [16]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v6.xsa with size 16 b:
FPGA: 0.15 ms
CPU: 0.11 ms
FPGA Throughput: 814.09 Kbps
Results for bitstream ./pynqrypt-v6.xsa with size 1024 b:
FPGA: 0.21 ms
CPU: 0.21 ms
FPGA Throughput: 36.90 Mbps
Results for bitstream ./pynqrypt-v6.xsa with size 262144 b:
FPGA: 24.72 ms
CPU: 24.14 ms
FPGA Throughput: 80.90 Mbps
Results for bitstream ./pynqrypt-v6.xsa with size 16777216 b:
FPGA: 1573.01 ms
CPU: 1644.07 ms
FPGA Throughput: 81.37 Mbps


In [17]:
tester = PynqryptTester('./pynqrypt-v7.xsa', post_ap=True)

In [18]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v7.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.12 ms
FPGA Throughput: 768.35 Kbps
Results for bitstream ./pynqrypt-v7.xsa with size 1024 b:
FPGA: 0.22 ms
CPU: 0.21 ms
FPGA Throughput: 35.65 Mbps
Results for bitstream ./pynqrypt-v7.xsa with size 262144 b:
FPGA: 19.16 ms
CPU: 24.88 ms
FPGA Throughput: 104.40 Mbps
Results for bitstream ./pynqrypt-v7.xsa with size 16777216 b:
FPGA: 1216.49 ms
CPU: 1644.51 ms
FPGA Throughput: 105.22 Mbps


In [19]:
tester = PynqryptTester('./pynqrypt-v8.xsa', post_ap=True)

In [20]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v8.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.11 ms
FPGA Throughput: 775.74 Kbps
Results for bitstream ./pynqrypt-v8.xsa with size 1024 b:
FPGA: 0.15 ms
CPU: 0.21 ms
FPGA Throughput: 50.84 Mbps
Results for bitstream ./pynqrypt-v8.xsa with size 262144 b:
FPGA: 7.60 ms
CPU: 24.32 ms
FPGA Throughput: 263.28 Mbps
Results for bitstream ./pynqrypt-v8.xsa with size 16777216 b:
FPGA: 475.91 ms
CPU: 1641.55 ms
FPGA Throughput: 268.96 Mbps


In [18]:
tester = PynqryptTester('./pynqrypt-v9.1.xsa', post_ap=True)

In [22]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-v9.1.xsa with size 16 b:
FPGA: 0.15 ms
CPU: 0.11 ms
FPGA Throughput: 814.54 Kbps
Results for bitstream ./pynqrypt-v9.1.xsa with size 1024 b:
FPGA: 0.14 ms
CPU: 0.20 ms
FPGA Throughput: 53.93 Mbps
Results for bitstream ./pynqrypt-v9.1.xsa with size 262144 b:
FPGA: 6.80 ms
CPU: 24.32 ms
FPGA Throughput: 293.91 Mbps
Results for bitstream ./pynqrypt-v9.1.xsa with size 16777216 b:
FPGA: 427.36 ms
CPU: 1639.08 ms
FPGA Throughput: 299.51 Mbps


In [20]:
tester = PynqryptTester('./pynqrypt-final.xsa', post_ap=True)

In [24]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-final.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.11 ms
FPGA Throughput: 796.48 Kbps
Results for bitstream ./pynqrypt-final.xsa with size 1024 b:
FPGA: 0.15 ms
CPU: 0.20 ms
FPGA Throughput: 53.06 Mbps
Results for bitstream ./pynqrypt-final.xsa with size 262144 b:
FPGA: 5.91 ms
CPU: 24.49 ms
FPGA Throughput: 338.16 Mbps
Results for bitstream ./pynqrypt-final.xsa with size 16777216 b:
FPGA: 369.10 ms
CPU: 1638.46 ms
FPGA Throughput: 346.79 Mbps


In [27]:
tester = PynqryptTester('./pynqrypt-final-v2.xsa', post_ap=True)

In [26]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-final-v2.xsa with size 16 b:
FPGA: 0.16 ms
CPU: 0.11 ms
FPGA Throughput: 804.07 Kbps
Results for bitstream ./pynqrypt-final-v2.xsa with size 1024 b:
FPGA: 0.15 ms
CPU: 0.20 ms
FPGA Throughput: 53.46 Mbps
Results for bitstream ./pynqrypt-final-v2.xsa with size 262144 b:
FPGA: 5.44 ms
CPU: 24.41 ms
FPGA Throughput: 367.77 Mbps
Results for bitstream ./pynqrypt-final-v2.xsa with size 16777216 b:
FPGA: 338.40 ms
CPU: 1636.12 ms
FPGA Throughput: 378.25 Mbps


In [25]:
tester = PynqryptTester('./pynqrypt-final-v3.xsa', post_ap=True)

In [28]:
tester.run_bench_suite()

Results for bitstream ./pynqrypt-final-v3.xsa with size 16 b:
FPGA: 0.15 ms
CPU: 0.10 ms
FPGA Throughput: 831.11 Kbps
Results for bitstream ./pynqrypt-final-v3.xsa with size 1024 b:
FPGA: 0.15 ms
CPU: 0.20 ms
FPGA Throughput: 52.99 Mbps
Results for bitstream ./pynqrypt-final-v3.xsa with size 262144 b:
FPGA: 4.66 ms
CPU: 24.25 ms
FPGA Throughput: 428.98 Mbps
Results for bitstream ./pynqrypt-final-v3.xsa with size 16777216 b:
FPGA: 290.04 ms
CPU: 1638.11 ms
FPGA Throughput: 441.31 Mbps
