# Performance: Linear Regression on PYNQ-Z1
### n_features = 32, n_outputs=10

#### 1. Setup CFFI SDSoC Binding (using ABI)

In [5]:
import cffi
from pynq import Overlay,PL,Xlnk
import timeit
import time
import numpy as np


#load bitstream
libfile = "./pynq_sklearn/libraries/liblreg_32_10.so"
bitfile = "./pynq_sklearn/bitstreams/linear_32_10.bit"

#init cffi (this must be before mem_init() for some reason?)
affi = cffi.FFI()
affi.cdef("void _p0_LinReg_1_noasync(int * x, int a[320], int b[10], int * output, int datalen);")
alib = affi.dlopen(libfile)
    
xlnk = Xlnk()
fir_overlay = Overlay(bitfile)
if PL.bitfile_name != bitfile:
    fir_overlay.download()

#xlnk.xlnk_reset()
def mem_init(buflen):
    """ Allocate contiguous memory buffer
    """
    buf = xlnk.cma_alloc(buflen)
    return xlnk.cma_cast(buf, "int")

number=2000
dlen = n = 1000
n_features = 32
n_outputs = 10
xin = np.ones(n*n_features, dtype=np.int32)
#din = affi.cast("int *", xin.ctypes.data)
din = mem_init(n*n_features*4)
a = mem_init(n_features*n_outputs*4)
b = mem_init(n_outputs*4)
dout = mem_init(n*n_outputs*4)

#### 2. Setup Scikit-Learn Predict

In [2]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X, y = datasets.make_regression(n_samples=5000, n_features=32, n_targets=10, random_state=4, noise=4, bias=100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000, random_state=42)
X_test = X_test.astype("int32")
y_test = y_test.astype("int32")

lin = LinearRegression(fit_intercept=True)
lin.fit(X_train, y_train)

coef = lin.coef_.T.astype("int32")
intercept = lin.intercept_.astype("int32")
print(din, a, b, dout)

<cdata 'int *' 0x923768> <cdata 'int *' 0x36f2a000> <cdata 'int *' 0x36f29000> <cdata 'int *' 0x2e91e000>


#### 3. Compare time

In [4]:
def sklearn():
    return np.dot(X_test, coef) + intercept
    #lin.predict(X_test)

# call ABI accelerator
def abiresp():
    alib._p0_LinReg_1_noasync(din,a,b,dout,dlen)
    
print("Running the benchmarks")
abi_time = timeit.timeit(abiresp,number=number)
sklearn_time = timeit.timeit(sklearn,number=number)

print("Time taken by sklearn", number,"times",sklearn_time)
print("Time taken by abi", number,"times",abi_time)
print("HW Speedup = %dx"%(sklearn_time/abi_time))

Running the benchmarks
Time taken by sklearn 2000 times 7.69411593300174
Time taken by abi 2000 times 1.8133551720093237
HW Speedup = 4x
