In [None]:
from pynq import Overlay
import os
import sys
import numpy as np

HOP_DIR=os.path.abspath("../")
sys.path.insert(0, HOP_DIR)
import hop
import stubs

#OVERLAY_DIR= os.path.join(HOP_DIR, 'overlays', 'add_reduce')
OVERLAY_DIR= os.path.join(HOP_DIR, 'overlays', 'add_reduce')

print(OVERLAY_DIR)

In [None]:
ol = Overlay(os.path.join(OVERLAY_DIR, "add_reduce.bit"))

In [None]:
ol.ip_dict

In [None]:
import importlib as il

il.reload(hop)
hop.Context.reloadModules()

if 'context' in locals():
    del context

In [None]:
context = hop.Context(ol)

In [None]:
add_reduce = context.functions['hardware']['add_reduce']
add = context.functions['hardware']['add']

In [None]:
l = [1] * 262144
print(f'{len(l)} == {add_reduce(l)}')

In [None]:
add_reduce.printRegspacePretty()

## Performance Comparison

In [None]:
def reduce_py(arr):
    acc = 0
    for n in arr:
        acc += n
    return acc

In [None]:
import functools 
def reduce_fn(arr):
    return functools.reduce(lambda a,b: a + b, arr)

In [None]:
def reduce_np(arr:'np.ndarray'):
    return np.add.reduce(arr)

In [None]:
import time
import random
def run_test_suite(tests, l):
    print('       00%', end='')
    hw_start = time.time()
    for _ in range(tests):
        add_reduce(l)
    hw_end = time.time()
    
    print('\r       25%', end='')

    py_start = time.time()
    for _ in range(tests):
        reduce_py(l)
    py_end = time.time()

    print('\r       50%', end='')

    fn_start = time.time()
    for _ in range(tests):
        reduce_fn(l)
    fn_end = time.time()

    print('\r       75%', end='')

    np_start = time.time()
    for i in range(tests):
        reduce_np(l)
    np_end = time.time()

    print('\r      100%')
    
    hw_time = hw_end - hw_start
    py_time = py_end - py_start
    fn_time = fn_end - fn_start
    np_time = np_end - np_start

    return (hw_time, py_time, fn_time, np_time)

In [None]:
data_size = 262144 # * 32b == 1 MiB

Ns = [1, 10, 100]
R = 10
Ls = [np.full(shape=(data_size,), fill_value=1, dtype=np.uint32),
         list(np.full(shape=(data_size,), fill_value=1, dtype=np.uint32)),
         list(np.full(shape=(data_size,), fill_value=1, dtype=int)),
         [random.randint(0, pow(2,32)) for _ in range(data_size) ]]
results = np.ndarray(shape=(len(lists), len(Ns), R, 4))

In [None]:
for Ln in range(len(Ls)):
    print(f'L: {Ln}')
    for r in range(R):
        print(f'  R: {r}')
        for Nn in range(len(Ns)):
            print(f'    N: {Ns[Nn]}')
            (h, p, f, n) = run_test_suite(Ns[Nn], Ls[Ln])
            results[Ln, Nn, r, 0] = h
            results[Ln, Nn, r, 1] = p
            results[Ln, Nn, r, 2] = f
            results[Ln, Nn, r, 3] = n

In [None]:
with open("results.csv", 'wb') as rf:
    np.save(rf, results)

In [None]:
results = np.load('results.csv')

In [None]:
# (List, Test size, Run number, Result)
results.shape

In [None]:

print(np.mean(results[0, 1, :, testDict['Hardware']]))
print(np.mean(results[0, 1, :, testDict['Numpy']]))

In [None]:
print(f'Python:      {py_time}')
print(f'Functional:  {fn_time}')
print(f'Numpy:       {np_time}')
print(f'Hardware:    {hw_time}')
print(f'PY/HW Speed: {(py_time / hw_time) * 100:.2f}%')
print(f'FN/HW Speed: {(fn_time / hw_time) * 100:.2f}%')
print(f'NP/HW Speed: {(np_time / hw_time) * 100:.2f}%')

In [None]:
import matplotlib.pyplot as plt
testNames = ['Hardware', 'Python', 'Functools', 'Numpy']
testDict = {'Hardware': 0, 'Python': 1, 'Functools': 2, 'Numpy': 3}
listDict = {0: 'ndarray', 1: 'np_cast_uint32', 2: 'np_cast_int', 3: 'list'}
testsDict = {0: '1t', 1: '10t', 2: '100t'}

for sel in range(4):
    fig, ax = plt.subplots()
    ax.set_ylabel('Execution time - seconds (Less is better)')
      
    for i in [0, 3]:
        ax.violinplot(results[sel, 2, :, :])
        
    ax.set_xticks([1, 2])
    ax.set_xticklabels(['Hardware', 'Numpy'])
    fig.set_figwidth(6)
    fig.set_figheight(8)
    plt.style.use('fivethirtyeight')
    file = f'plots/{listDict[sel[0]]}-{testsDict[sel[1]]}-numpy_hw'
    plt.savefig(file, bbox_inches='tight')
    plt.show

In [None]:
import matplotlib.pyplot as plt
testNames = ['Hardware', 'Python', 'Functools', 'Numpy']
testDict = {'Hardware': 0, 'Python': 1, 'Functools': 2, 'Numpy': 3}
listDict = {0: 'ndarray', 1: 'np_cast_uint32', 2: 'np_cast_int', 3: 'list'}
testsDict = {0: '1t', 1: '10t', 2: '100t'}

for sel in range(4):
    fig, ax = plt.subplots()
    ax.set_ylabel('Execution time - seconds (Less is better)')
      
    ax.violinplot(results[sel, 2, :, 0], positions=[1])
    ax.violinplot(results[sel, 2, :, 3], positions=[2])
        
    ax.set_xticks([1, 2])
    ax.set_xticklabels(['Hardware', 'Numpy'])
    fig.set_figwidth(6)
    fig.set_figheight(8)
    plt.style.use('fivethirtyeight')
    file = f'plots/{listDict[sel]}-100t-numpy_hw'
    plt.savefig(file, bbox_inches='tight')
    plt.show()
    plt.close()

In [None]:
f'{(np.average(results[0, 2, :, 3]) / np.average(results[0, 2, :, 0])) * 100 :.2f}%'

In [None]:
m = 0
for i in range(4):
    j = np.argmax(np.std(results[i, 2, :, :], axis=0))
    m = max(np.std(results[i, 2, :, j]) / np.mean(results[i, 2, :, j]), m)
print(f'{(m * 100):.2f}%')