# Linear Regression on PYNQ-Z1
### n_features = 32, n_outputs=10

In [1]:
import numpy as np
from sklearn import datasets
from pynq_sklearn.linear_model import PynqLinearRegression
import time

# Generate dataset of Ints
X, y = datasets.make_regression(n_samples=5000, n_features=32, n_targets=10, random_state=43, noise=4.0,
                       bias=26.0)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000, random_state=42)

#### 1. Fit a PYNQ Linear Regression Model

In [2]:
model = PynqLinearRegression(fit_intercept=True)
model.fit(X_train, y_train)

#### 2. With hw_accel=True, deploy prediction on the FPGA 

In [3]:
# prepare X_test and y_test for HW
FRAC_WIDTH = 20
X_test_hw = (X_test*(1<<FRAC_WIDTH)).astype(np.int32)
y_test_hw = (y_test*(1<<FRAC_WIDTH)).astype(np.int32)

X_test_hw = model.copy_array(X_test_hw, dtype=np.int32) # allocates X_test_hw to contiguous memory

In [4]:
y_pred = model.predict(X_test_hw)

#### 3. Alternatively, deploy in SW

In [5]:
model.hw_accel=False
y_pred_sw = model.predict(X_test)

#### 4. Verify equivalence

In [6]:
print((y_pred*(1.0/(1<<FRAC_WIDTH)) - y_pred_sw).max())

0.00035776521343677814


#### 5. Performance
Bad performance observed because of sw-to-hw processing in lin.predict()

In [7]:
import timeit
number=200

model.hw_accel=True
def hwresp():
    model.predict(X_test_hw) #, out=outBuffer)

hw_time = timeit.timeit(hwresp,number=number)

model.hw_accel=False
def swresp():
    model.predict(X_test)

sw_time = timeit.timeit(swresp,number=number)

print("Time taken by sklearn", number,"times",sw_time)
print("Time taken by sklearn+fpga", number,"times",hw_time)
print("HW Speedup = %dx"%(sw_time/hw_time))

Time taken by sklearn 200 times 1.101171796000017
Time taken by sklearn+fpga 200 times 0.1389062240000385
HW Speedup = 7x


### Verify Equivalence Again

In [8]:
N = 10
n_features = 32
n_outputs = 10

np.random.seed(23)

x = np.random.rand(N*n_features).astype(np.float32)*2 - 1
a = np.random.rand(n_features*n_outputs).astype(np.float32)*2 - 1
b = np.random.rand(n_outputs).astype(np.float32)*2 - 1

x_hw = (x*(1<<FRAC_WIDTH)).astype(np.int32)
a_hw = (a*(1<<FRAC_WIDTH)).astype(np.int32)
b_hw = (b*(1<<FRAC_WIDTH)).astype(np.int32)

x_sw = x.reshape(-1, n_features).astype(np.float32)
a_sw = a.reshape(-1, n_features).astype(np.float32)
b_sw = b.astype(np.float32)

#hw accelerator
model.hw_accel = True
model.coef_hw = model.copy_array(a_hw, dtype=np.int32)
model.intercept_hw = model.copy_array(b_hw, dtype=np.int32)
xtest = model.copy_array(x_hw, dtype=np.int32)
ypred = model.predict(xtest)
ypred = ypred*(1.0/(1<<FRAC_WIDTH))

#sw
model.hw_accel = False
model.coef_ = a_sw
model.intercept_ = b_sw
ypred_sw = model.predict(x_sw)

In [9]:
print(ypred - ypred_sw)

[[-1.65700912e-05 -1.04904175e-05 -1.04904175e-05 -1.57356262e-05
  -1.25169754e-05 -1.64508820e-05 -1.20401382e-05 -1.77025795e-05
  -1.25169754e-05 -1.25169754e-05]
 [-1.25169754e-05 -1.71065331e-05 -1.23977661e-05 -1.90734863e-05
  -1.88350677e-05 -1.64508820e-05 -1.71661377e-05 -1.26957893e-05
  -1.38878822e-05 -1.31726265e-05]
 [-1.85966492e-05 -1.33514404e-05 -1.32322311e-05 -1.65700912e-05
  -2.00271606e-05 -2.24113464e-05 -1.25765800e-05 -1.00135803e-05
  -9.77516174e-06 -1.00135803e-05]
 [-2.67028809e-05 -1.46180391e-05 -1.44243240e-05 -1.18017197e-05
  -8.94069672e-06 -1.47819519e-05 -1.90734863e-05 -1.46627426e-05
  -1.02519989e-05 -1.03712082e-05]
 [-1.40070915e-05 -1.57356262e-05 -1.85966492e-05 -1.07288361e-05
  -1.23977661e-05 -1.95503235e-05 -1.87158585e-05 -1.71661377e-05
  -1.09970570e-05 -1.33514404e-05]
 [-1.63912773e-05 -1.31130219e-05 -8.58306885e-06 -1.76429749e-05
  -1.85966492e-05 -1.84774399e-05 -1.15633011e-05 -1.69277191e-05
  -1.23977661e-05 -2.14576721e-05

In [10]:
print(ypred)

[[ 1.51902103 -1.06475925 -3.28856182  0.91558361 -1.52748013  3.34065819
  -1.47908974  0.20988274  0.62380314 -1.07286453]
 [-1.06260586  0.64205551 -2.64224148  1.0890274   1.4914732   1.52650261
   1.92728806  0.37183475  0.03969097  0.02293491]
 [ 2.60198021 -1.09309387 -1.82859516  0.81075478  3.28142738  2.06953144
   0.1934967  -3.36218548 -3.4997921  -1.64908314]
 [ 4.76349545 -0.15543938 -1.4558115  -0.62310886 -1.96616268  2.09543705
   3.16730022  0.15124321 -1.12252331 -1.68079758]
 [-0.42061615  1.37967873  1.40735435 -2.79505253 -2.38910294  2.42025471
   1.1307478   1.18541527  0.13239765 -2.91860962]
 [-0.63260555 -1.5522356  -2.84490204  2.62291813  2.32999229  1.77416897
  -1.39729786  1.62425613 -1.03446484  2.80814266]
 [-1.13018894 -0.32833958 -0.75452995  1.5255785   1.52586555  0.00692558
  -1.88742161 -0.10761166  0.37456226 -0.31443119]
 [ 2.0696516   1.37896061 -3.30697823 -2.1456728   0.88922977  2.0313158
   1.07676888 -1.58901787  1.07527065 -3.43192959]
 

In [11]:
print(ypred_sw)

[[ 1.5190376  -1.0647488  -3.2885513   0.91559935 -1.5274676   3.3406746
  -1.4790777   0.20990044  0.62381566 -1.072852  ]
 [-1.0625933   0.6420726  -2.642229    1.0890465   1.491492    1.5265191
   1.9273052   0.37184745  0.03970486  0.02294809]
 [ 2.6019988  -1.0930805  -1.8285819   0.81077135  3.2814474   2.0695539
   0.19350928 -3.3621755  -3.4997823  -1.6490731 ]
 [ 4.763522   -0.15542476 -1.4557971  -0.62309706 -1.9661537   2.0954518
   3.1673193   0.15125787 -1.122513   -1.6807872 ]
 [-0.42060214  1.3796945   1.407373   -2.7950418  -2.3890905   2.4202743
   1.1307665   1.1854324   0.13240865 -2.9185963 ]
 [-0.63258916 -1.5522225  -2.8448935   2.6229358   2.330011    1.7741874
  -1.3972863   1.6242731  -1.0344524   2.8081641 ]
 [-1.1301762  -0.32832497 -0.75452113  1.5255951   1.52588     0.00694585
  -1.88741    -0.10759211  0.37457806 -0.3144146 ]
 [ 2.0696683   1.3789778  -3.3069673  -2.1456604   0.889248    2.0313344
   1.0767814  -1.5890046   1.0752857  -3.4319205 ]
 [-2.90