# Linear Regression on PYNQ-Z1
### n_features = 32, n_outputs=10

In [1]:
import numpy as np
from sklearn import datasets
from pynq_sklearn.linear_model import PynqLinearRegression
import time

# Generate dataset of Ints
X, y = datasets.make_regression(n_samples=5000, n_features=32, n_targets=10, random_state=43, noise=4.0,
                       bias=26.0)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000, random_state=42)

#### 1. Fit a PYNQ Linear Regression Model

In [2]:
model = PynqLinearRegression(fit_intercept=True)
model.fit(X_train, y_train)

#### 2. With hw_accel=True, deploy prediction on the FPGA 

In [3]:
# prepare X_test and y_test for HW
FRAC_WIDTH = 24
X_test_hw = (X_test*(1<<FRAC_WIDTH)).astype(np.int32)
y_test_hw = (y_test*(1<<FRAC_WIDTH)).astype(np.int32)

X_test_hw = model.copy(X_test_hw, dtype=np.int32) # allocates X_test_hw to contiguous memory

In [4]:
y_pred = model.predict(X_test_hw)

#### 3. Alternatively, deploy in SW

In [5]:
model.hw_accel=False
y_pred_sw = model.predict(X_test)

#### 4. Verify equivalence

[[-4.49478013e-06 -3.09672816e-06 -6.17630509e-07 ... -5.50163787e-06
   2.60537914e-06 -1.38254390e-06]
 [ 3.97980315e-06  3.34022958e-06  2.56000009e+02 ...  2.56000007e+02
   2.56000007e+02  2.56000010e+02]
 [-2.56000009e+02 -2.56000004e+02 -2.56000006e+02 ... -2.56000011e+02
  -4.21495876e-06 -1.89698182e-06]
 ...
 [-7.62413354e-06 -5.32690305e-06 -1.15313373e-05 ... -8.30745788e-06
   2.55999998e+02  2.55999999e+02]
 [ 2.56000004e+02  2.56000005e+02  4.48502087e-06 ... -3.25869550e-07
   6.50461290e-06  6.76954414e-07]
 [-4.63467698e-06 -2.56000004e+02 -2.56000008e+02 ... -2.56000008e+02
  -4.46315710e-07 -2.56000006e+02]]


#### 5. Performance
Bad performance observed because of sw-to-hw processing in lin.predict()

In [7]:
import timeit
number=200

model.hw_accel=True
def hwresp():
    model.predict(X_test_hw) #, out=outBuffer)

hw_time = timeit.timeit(hwresp,number=number)

model.hw_accel=False
def swresp():
    model.predict(X_test)

sw_time = timeit.timeit(swresp,number=number)

print("Time taken by sklearn", number,"times",sw_time)
print("Time taken by sklearn+fpga", number,"times",hw_time)
print("HW Speedup = %dx"%(sw_time/hw_time))

Time taken by sklearn 200 times 1.1033033580001756
Time taken by sklearn+fpga 200 times 0.1294480280002972
HW Speedup = 8x


### Verify Equivalence Again

In [18]:
N = 10
n_features = 32
n_outputs = 10

np.random.seed(23)

x = np.random.rand(N*n_features).astype(np.float32)*2 - 1
a = np.random.rand(n_features*n_outputs).astype(np.float32)*2 - 1
b = np.random.rand(n_outputs).astype(np.float32)*2 - 1

x_hw = (x*(1<<FRAC_WIDTH)).astype(np.int32)
a_hw = (a*(1<<FRAC_WIDTH)).astype(np.int32)
b_hw = (b*(1<<FRAC_WIDTH)).astype(np.int32)

x_sw = x.reshape(-1, n_features).astype(np.float32)
a_sw = a.reshape(-1, n_features).astype(np.float32)
b_sw = b.astype(np.float32)

#hw accelerator
model.hw_accel = True
model.coef_hw = model.copy(a_hw, dtype=np.int32)
model.intercept_hw = model.copy(b_hw, dtype=np.int32)
xtest = model.copy(x_hw, dtype=np.int32)
ypred = model.predict(xtest)
ypred = ypred*(1.0/(1<<FRAC_WIDTH))

#sw
model.hw_accel = False
model.coef_ = a_sw
model.intercept_ = b_sw
ypred_sw = model.predict(x_sw)

In [19]:
print(ypred - ypred_sw)

[[-1.01327896e-06 -8.34465027e-07 -1.43051147e-06 -5.96046448e-07
  -1.13248825e-06 -1.37090683e-06 -8.34465027e-07 -8.94069672e-07
  -1.07288361e-06 -7.74860382e-07]
 [-6.55651093e-07 -8.94069672e-07  0.00000000e+00 -8.94069672e-07
  -5.96046448e-07 -1.01327896e-06 -1.19209290e-06 -8.34465027e-07
  -1.07288361e-06 -9.53674316e-07]
 [-9.53674316e-07 -1.01327896e-06 -9.53674316e-07 -1.01327896e-06
  -1.01327896e-06 -1.13248825e-06 -1.31130219e-06 -1.49011612e-06
  -7.15255737e-07 -1.13248825e-06]
 [-9.53674316e-07 -7.89761543e-07 -8.94069672e-07 -1.01327896e-06
  -7.74860382e-07 -1.31130219e-06 -7.15255737e-07 -1.19209290e-06
  -8.94069672e-07 -1.01327896e-06]
 [-8.94069672e-07 -9.53674316e-07 -8.34465027e-07 -8.94069672e-07
  -7.74860382e-07 -1.19209290e-06 -1.07288361e-06 -7.74860382e-07
  -9.83476639e-07 -8.94069672e-07]
 [-1.01327896e-06 -1.07288361e-06 -1.19209290e-06 -1.13248825e-06
  -1.07288361e-06 -9.53674316e-07 -8.94069672e-07 -7.74860382e-07
  -8.94069672e-07 -5.96046448e-07

In [20]:
print(ypred)

[[ 1.51903659 -1.0647496  -3.28855276  0.91559875 -1.52746874  3.34067327
  -1.47907853  0.20989954  0.62381458 -1.07285279]
 [-1.062594    0.64207172 -2.64222908  1.08904558  1.49149144  1.52651805
   1.92730403  0.37184662  0.03970379  0.02294713]
 [ 2.60199785 -1.09308153 -1.82858288  0.81077033  3.2814464   2.06955272
   0.19350797 -3.36217695 -3.49978304 -1.64907426]
 [ 4.76352119 -0.15542555 -1.45579797 -0.62309808 -1.96615452  2.09545052
   3.16731858  0.15125668 -1.12251395 -1.68078822]
 [-0.42060304  1.37969351  1.40737212 -2.79504269 -2.38909131  2.42027307
   1.13076544  1.18543166  0.13240767 -2.91859716]
 [-0.63259017 -1.55222356 -2.84489465  2.62293464  2.33000982  1.77418649
  -1.39728719  1.62427229 -1.03445333  2.80816352]
 [-1.13017726 -0.32832593 -0.75452214  1.52559429  1.52587879  0.00694501
  -1.88741106 -0.10759324  0.37457711 -0.31441563]
 [ 2.0696671   1.37897658 -3.30696779 -2.14566159  0.88924712  2.03133327
   1.07678056 -1.58900535  1.07528472 -3.43192148]


In [21]:
print(ypred_sw)

[[ 1.5190376  -1.0647488  -3.2885513   0.91559935 -1.5274676   3.3406746
  -1.4790777   0.20990044  0.62381566 -1.072852  ]
 [-1.0625933   0.6420726  -2.642229    1.0890465   1.491492    1.5265191
   1.9273052   0.37184745  0.03970486  0.02294809]
 [ 2.6019988  -1.0930805  -1.8285819   0.81077135  3.2814474   2.0695539
   0.19350928 -3.3621755  -3.4997823  -1.6490731 ]
 [ 4.763522   -0.15542476 -1.4557971  -0.62309706 -1.9661537   2.0954518
   3.1673193   0.15125787 -1.122513   -1.6807872 ]
 [-0.42060214  1.3796945   1.407373   -2.7950418  -2.3890905   2.4202743
   1.1307665   1.1854324   0.13240865 -2.9185963 ]
 [-0.63258916 -1.5522225  -2.8448935   2.6229358   2.330011    1.7741874
  -1.3972863   1.6242731  -1.0344524   2.8081641 ]
 [-1.1301762  -0.32832497 -0.75452113  1.5255951   1.52588     0.00694585
  -1.88741    -0.10759211  0.37457806 -0.3144146 ]
 [ 2.0696683   1.3789778  -3.3069673  -2.1456604   0.889248    2.0313344
   1.0767814  -1.5890046   1.0752857  -3.4319205 ]
 [-2.90