In [10]:
from pynq import Overlay
import os
import sys
import numpy as np

HOP_DIR=os.path.abspath("../")
sys.path.insert(0, HOP_DIR)
import hop

OVERLAY_DIR=f'{HOP_DIR}/overlays/a_add/'

# Preparation

#### Load the overlay onto the FPGA

In [11]:
ol = Overlay(OVERLAY_DIR + "add.bit")

In [3]:
ol.ip_dict

{'adder_ppo_0': {'type': 'HoP:HoP:adder_ppo:0.1',
  'mem_id': 's_axi_cep',
  'memtype': 'REGISTER',
  'gpio': {},
  'interrupts': {},
  'parameters': {'C_S_AXI_CEP_ADDR_WIDTH': '7',
   'C_S_AXI_CEP_DATA_WIDTH': '32',
   'C_M_AXI_MST_ID_WIDTH': '1',
   'C_M_AXI_MST_ADDR_WIDTH': '64',
   'C_M_AXI_MST_DATA_WIDTH': '32',
   'C_M_AXI_MST_AWUSER_WIDTH': '1',
   'C_M_AXI_MST_ARUSER_WIDTH': '1',
   'C_M_AXI_MST_WUSER_WIDTH': '1',
   'C_M_AXI_MST_RUSER_WIDTH': '1',
   'C_M_AXI_MST_BUSER_WIDTH': '1',
   'C_M_AXI_MST_USER_VALUE': '0x00000000',
   'C_M_AXI_MST_PROT_VALUE': '"000"',
   'C_M_AXI_MST_CACHE_VALUE': '"0011"',
   'C_M_AXI_MST_ENABLE_ID_PORTS': 'true',
   'C_M_AXI_MST_ENABLE_USER_PORTS': 'false',
   'Component_Name': 'add_adder_ppo_0_0',
   'clk_period': '10',
   'machine': '64',
   'combinational': '0',
   'latency': 'undef',
   'II': 'x',
   'EDK_IPTYPE': 'PERIPHERAL',
   'C_S_AXI_CEP_BASEADDR': '0x40000000',
   'C_S_AXI_CEP_HIGHADDR': '0x4000FFFF',
   'ADDR_WIDTH': '7',
   'DATA_WIDTH

#### Register the overlay with the HoP context

In [12]:
context = hop.Context(ol)

# Use

#### Create some python functions

In [5]:
def py_func1() -> int:
    return 58

def py_func2() -> int:
    return 1000

def py_func3() -> int:
    return 100

In [6]:
context.print_all_objects()

hardware:
	add : b32 -> b32 -> b32
python:
cpp:


#### Register functions with HoP context

In [7]:
import importlib as il
if 'context' in locals():
    del context
    il.reload(hop)
    hop.Context.reloadModules()
    ol = Overlay(OVERLAY_DIR + "a_add.bit")
    context = hop.Context(ol)

a_py = context.register(py_func1, "b32")
b_py = context.register(py_func2, "b32")
c_py = context.register(py_func3, "b32")
d_constant = context.register(1000, 'b32')

In [8]:
# Get our hardware
add = context.functions['hardware']['add']

### Use add interchangeably

In [None]:
print(add(1, np.uint(1000)))
print(add(np.uint32(3), 1))
print(add(np.uint32(50), add(np.uint32(25), np.uint32(25))))
print(add(np.uint32(100), np.uint32(100)))
add(np.uint32(150), np.uint32(150))

### Reduce with hardware

In [15]:
import functools 
import time

l = [1] * 100
hw_start = time.perf_counter()
print(functools.reduce(add, l))
hw_end = time.perf_counter()
print(f'hw ttf: {hw_end - hw_start}')
sw_start = time.perf_counter()
print(functools.reduce(lambda a,b: a + b, l))
sw_end = time.perf_counter()
print(f'sw ttf: {sw_end - sw_start:.2f}')

100
hw ttf: 0.5429547560001993
100
sw ttf: 0.0011761110004044895


#### Imitate Keras Sequential Executor
Tensorflow layers can be hardware, example of defining CNN:

##### Imitate above NN

In [None]:
import random
class fakeNN:
    def __init__(self, layers):
        self.layers = layers
    
    def singleEpoch(self, inputLayer):
        output = inputLayer
        for l in self.layers:
            leftPercent = random.random()
            rightPercent = 1.0 - leftPercent
            left = int(leftPercent * output)
            right = int(rightPercent * output)
            output = l(left, right)
        return output            

In [None]:
layers = [add,
          add,
          add]
nn = fakeNN(layers)
print(nn.singleEpoch(32))

# Debug

In [15]:
add.printRegspacePretty()

sig:         22224
status:      0
call_count:  1
debug:       377790472
rep_addr:    0
cready:      377790464
