In [31]:
'''
Write numpy LeakyReLu for use in ANN acceleration
Tensorflor model.predict is too slow bc it uses tensors optimized for batch predictions

How to use:
    Set modelPath as the directory containing the saved_model.pb file
    Set loaded_model name

'''

import konverter
import tensorflow as tf
import os
import numpy as np



In [32]:
modelPath = r'C:\Users\win7\Desktop\logs\logs\scalars\lnphi_nC4_T300-600_P5-100__100_4_20_100_20200916-112226'
tempname='lnphi_nC4_T300-600_P5-100__100_4_20_100_20200916-112226'

# Load
loaded_model = tf.keras.models.load_model(modelPath)



In [55]:
# Re-save as .h5 in current directory

loaded_model.save(tempname+'.h5', save_format='h5')

In [56]:
# Convert

konverter.konvert(tempname+'.h5',
                  output_file=os.getcwd()+'\\'+tempname+'_Konverted')

[92m
Successfully got model architecture! 😄
[0m[37m
[38;5;220mLayers:[0m[37m
  [38;5;74mname: dense[0m[37m
  [38;5;205mactivation: LeakyReLU[0m[37m
  [38;5;135mshape: (4, 20)[0m[37m[38;5;119m
[0m[37m  [38;5;74mname: dense[0m[37m
  [38;5;205mactivation: LeakyReLU[0m[37m
  [38;5;135mshape: (20, 20)[0m[37m[38;5;119m
[0m[37m  [38;5;74mname: dense[0m[37m
  [38;5;205mactivation: LeakyReLU[0m[37m
  [38;5;135mshape: (20, 20)[0m[37m[38;5;119m
[0m[37m  [38;5;74mname: dense[0m[37m
  [38;5;205mactivation: LeakyReLU[0m[37m
  [38;5;135mshape: (20, 20)[0m[37m[38;5;119m
[0m[37m  [38;5;74mname: dense[0m[37m
  [38;5;135mshape: (20, 1)[0m[37m
[0m[37m[38;5;220m
🔨 Now building pure Python + NumPy model...[0m[37m
[92m🙌 Saved Konverted model![0m[37m
[92mOutput model file: C:/Users/win7/PycharmProjects/Ln_phi_model/Applied_model/numpy_relu/lnphi_nC4_T300-600_P5-100__100_4_20_100_20200916-112226_Konverted.py[0m[37m
[92mWeights and biases f

<konverter.Konverter at 0x1bc413a05c8>

## Testing

In [23]:
wb = np.load(r'C:\Users\win7\PycharmProjects\Ln_phi_model\Applied_model\numpy_relu\lnphi_nC10_T300-600_P5-100__100_4_20_100_20200915-230243\lnphi_nC10_T300-600_P5-100__100_4_20_100_20200915-230243_Konverted_weights.npz',
             allow_pickle=True)
w, b = wb['wb']

def predict(x,w,b):
    #x = np.array(x, dtype=np.float32)
    l0 = np.dot(x, w[0]) + b[0]
    l0 = np.where(l0 > 0, l0, l0 * 0.1)
    l1 = np.dot(l0, w[1]) + b[1]
    l1 = np.where(l1 > 0, l1, l1 * 0.1)
    l2 = np.dot(l1, w[2]) + b[2]
    l2 = np.where(l2 > 0, l2, l2 * 0.1)
    l3 = np.dot(l2, w[3]) + b[3]
    l3 = np.where(l3 > 0, l3, l3 * 0.1)
    l4 = np.dot(l3, w[4]) + b[4]
    return l4


In [33]:
# Verify exactly same prediction 
true_val = 0.72913244
X_prepared_nC10 = np.array([0.096363940, 0.231393959, 0.228188976, 0.098272932], dtype=np.float32)
y_hat_nC10 = predict(X_prepared_nC10,w,b)
y_hat_nC10


array([0.75913244], dtype=float32)

In [25]:
%timeit predict(X_prepared_nC10,w,b)

37 µs ± 135 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## Normalization with numpy
Extract the min-max values stored in the pipeline.  
    
    pr.attr_full_pipeline_nC4.named_transformers_.num.named_steps.min_max_scaler.data_min_
    Out[37]: array([0.01831612, 0.006697  , 0.006697  , 0.01831612])
    pr.attr_full_pipeline_nC4.named_transformers_.num.named_steps.min_max_scaler.data_max_
    Out[38]: array([15.97685643,  0.75642223,  0.29020326,  6.85400705])
    

# Test speedup with numba

In [26]:
from numba import jit

In [35]:
@jit(nopython=True)
def predict_numba(x):#predict_numba(x,w0,w1,w2,w3,w4,b0,b1,b2,b3,b4)
    #x = np.array(x, dtype=np.float32)
    l0 = np.dot(x, w0) + b0
    l0 = np.where(l0 > 0, l0, l0 * 0.1)
    l1 = np.dot(l0, w1) + b1
    l1 = np.where(l1 > 0, l1, l1 * 0.1)
    l2 = np.dot(l1, w2) + b2
    l2 = np.where(l2 > 0, l2, l2 * 0.1)
    l3 = np.dot(l2, w3) + b3
    l3 = np.where(l3 > 0, l3, l3 * 0.1)
    l4 = np.dot(l3, w4) + b4
    return l4

In [36]:
w0 = w[0].astype('float64')
w1 = w[1].astype('float64')
w2 = w[2].astype('float64')
w3 = w[3].astype('float64')
w4 = w[4].astype('float64')

b0 = b[0].astype('float64')
b1 = b[1].astype('float64')
b2 = b[2].astype('float64')
b3 = b[3].astype('float64')
b4 = b[4].astype('float64')

X_prepared_nC10 = np.array([0.096363940, 0.231393959, 0.228188976, 0.098272932], dtype=np.float64)

In [40]:
%timeit predict_numba(X_prepared_nC10)

4.11 µs ± 8.04 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [30]:
%timeit predict_numba(X_prepared_nC10,w0,w1,w2,w3,w4,b0,b1,b2,b3,b4)

6.25 µs ± 18.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [29]:
predict_numba(X_prepared_nC10,w0,w1,w2,w3,w4,b0,b1,b2,b3,b4)

array([0.75913245])

In [133]:
l0 = np.dot(X_prepared_nC10, w0) + b0
l0 = np.where(l0 > 0, l0, l0 * 0.1)
l1 = np.dot(l0, w1) + b1

In [138]:
l0.dtype

dtype('float32')

In [141]:
@jit(nopython=True)
def predict_numba2(x,w0,w1,w2,w3,w4,b0,b1,b2,b3,b4):
    #x = np.array(x, dtype=np.float32)
    l0 = np.dot(x, w0) + b0
    print(l0.dtype)
    l0 = np.where(l0 > 0, l0, l0 * 0.1)
    print(l0.dtype)
    return

In [143]:
predict_numba2(X_prepared_nC10,w0,w1,w2,w3,w4,b0,b1,b2,b3,b4)

float32
float64


In [81]:
x = np.array([1,2,3], dtype=np.float32)
y = np.array([-1, -2, -3], dtype=np.float32)

In [82]:
ans = func(x,y)

[0. 0. 0.]


In [83]:
ans.dtype

dtype('float32')

In [13]:
w[0].shape

(4, 20)

In [102]:
ww = np.concatenate((w[1], w[2]), axis=0)

In [103]:
ww.shape

(40, 20)

In [104]:
w[1].shape

(20, 20)

In [108]:
ww[0][:]

array([-0.37685478, -0.84386986,  0.7242062 , -1.6956679 , -0.25299639,
       -0.21747477,  0.3012555 , -3.76763   ,  0.31904596, -3.0864134 ,
       -0.6804349 , -0.58764225, -2.0279973 , -0.28296712, -3.415852  ,
       -0.24605446, -0.13039353, -0.27876443, -2.1728024 , -4.8867693 ],
      dtype=float32)

In [107]:
w[1]

array([[-3.76854777e-01, -8.43869865e-01,  7.24206209e-01,
        -1.69566786e+00, -2.52996385e-01, -2.17474774e-01,
         3.01255494e-01, -3.76763010e+00,  3.19045961e-01,
        -3.08641338e+00, -6.80434883e-01, -5.87642252e-01,
        -2.02799726e+00, -2.82967120e-01, -3.41585207e+00,
        -2.46054456e-01, -1.30393535e-01, -2.78764427e-01,
        -2.17280245e+00, -4.88676929e+00],
       [ 4.76061910e-01, -1.07669973e+00,  8.18186104e-02,
        -1.48979485e-01, -6.67059183e-01,  1.26703858e-01,
         9.60837305e-01, -3.05824041e-01, -2.49848336e-01,
        -2.71173298e-01,  1.89209253e-01,  8.34952176e-01,
        -2.91873187e-01,  1.18913971e-01, -1.37842512e+00,
         2.24435434e-01, -2.05970377e-01,  1.42789751e-01,
        -3.93448234e-01,  6.44974336e-02],
       [-2.93835258e+00,  8.20785344e-01, -9.94703114e-01,
         2.16116476e+00,  7.32618093e-01, -1.40510976e+00,
        -4.61102337e-01,  3.94956398e+00, -1.63320935e+00,
         2.52106881e+00,  2.0