In [1]:
import onnx
import numpy as np
from qonnx.util.basic import qonnx_make_model
from finn.util.visualization import showInNetron
import onnxruntime as rt
from qonnx.util.basic import qonnx_make_model
from onnx.helper import make_tensor_value_info, make_node, make_graph, make_model, make_tensor
from onnx import numpy_helper

#### PART 1 : Defining the graph for the six LSTM equations (to be used with the scan body attribute while defining the scan node.)

In [2]:
# Defining the inputs and outputs of the graph we need to create for the graph of the scan body.
# ---------------------------------------------------
# Defining the inputs value info tensors for the compute to be executed for each input.
inp2_m2 = make_tensor_value_info("h_t-1",onnx.TensorProto.FLOAT, [20,1])
inp2_elm1 = make_tensor_value_info("c_t-1", onnx.TensorProto.FLOAT, [20,1])
inp2_m1 = make_tensor_value_info("X",onnx.TensorProto.FLOAT, [10,1])

#Output value info tensor definitions

### Partial output defined for this variable in the graph as we are concatenating the hidden states instead of the values of the output gates.
out_input_forget_matmul = make_tensor_value_info("i_f_matmul", onnx.TensorProto.FLOAT, [20,1])
out_hidden_forget_matmul = make_tensor_value_info("h_f_matmul", onnx.TensorProto.FLOAT, [20,1])
out_hidden_state = make_tensor_value_info("h_t", onnx.TensorProto.FLOAT, [20,1])
out_forget_gate = make_tensor_value_info("f_t_gate", onnx.TensorProto.FLOAT, [20,1])
out_input_gate = make_tensor_value_info("i_t_gate", onnx.TensorProto.FLOAT, [20,1])
out_cell_gate = make_tensor_value_info("c_t_gate", onnx.TensorProto.FLOAT, [20,1])
out_out_gate = make_tensor_value_info("o_t_gate", onnx.TensorProto.FLOAT, [20,1])
out_cell_state = make_tensor_value_info("c_t", onnx.TensorProto.FLOAT, [20,1])
out_hidden_state_concat = make_tensor_value_info("h_t_concat", onnx.TensorProto.FLOAT, [20,1])

In [3]:
#Applying Quantize and Dequantize operation to the input as mentioned in the output onnx graph.
ql_input = make_node("QuantizeLinear", inputs=["X","scale_test","zero_point_all"], outputs=["ql_input_out"],name="ql_input")
dql_input = make_node("DequantizeLinear", inputs=["ql_input_out", 'scale_test', "zero_point_all"], outputs=["dql_input_out"],name="dql_input")

#### Starting to introduce clip layers after every quantizelinear layer for quantization less than 8-bits.

In [4]:
#Pushing the weights quantisation in the scan node.
ql_w1 = make_node("QuantizeLinear", inputs=["W_f","scale_f","zero_point_all"], outputs=["ql_wf_out"], name="ql_w1")
clp_w1 = make_node("Clip", inputs=["ql_wf_out","min","max"], outputs=["clp_wf"], name="clp_w1")
dql_w1 = make_node("DequantizeLinear", inputs=["clp_wf","scale_f","zero_point_all"], outputs=["dql_wf_out"], name="dql_w1")

ql_w2 = make_node("QuantizeLinear", inputs=["W_i","scale_i","zero_point_all"], outputs=["ql_wi_out"], name="ql_w2")
clp_w2 = make_node("Clip", inputs=["ql_wi_out","min","max"], outputs=["clp_wi"], name="clp_w2")
dql_w2 = make_node("DequantizeLinear", inputs=["clp_wi","scale_i","zero_point_all"], outputs=["dql_wi_out"], name="dql_w2")

ql_w3 = make_node("QuantizeLinear", inputs=["W_c","scale_c","zero_point_all"], outputs=["ql_wc_out"], name="ql_w3")
clp_w3 = make_node("Clip", inputs=["ql_wc_out","min","max"], outputs=["clp_wc"], name="clp_w3")
dql_w3 = make_node("DequantizeLinear", inputs=["clp_wc","scale_c","zero_point_all"], outputs=["dql_wc_out"], name="dql_w3")

ql_w4 = make_node("QuantizeLinear", inputs=["W_o","scale_o","zero_point_all"], outputs=["ql_wo_out"], name="ql_w4")
clp_w4 = make_node("Clip", inputs=["ql_wo_out","min","max"], outputs=["clp_wo"], name="clp_w4")
dql_w4 = make_node("DequantizeLinear", inputs=["clp_wo","scale_o","zero_point_all"], outputs=["dql_wo_out"], name="dql_w4")

#These are the quantizations for the recurrence weight matrices.
ql_u1 = make_node("QuantizeLinear", inputs=["U_f","scale_f","zero_point_all"], outputs=["ql_uf_out"], name="ql_u1")
clp_u1 = make_node("Clip", inputs=["ql_uf_out","min","max"], outputs=["clp_uf"], name="clp_u1")
dql_u1 = make_node("DequantizeLinear", inputs=["clp_uf","scale_f","zero_point_all"], outputs=["dql_uf_out"], name="dql_u1")

ql_u2 = make_node("QuantizeLinear", inputs=["U_i","scale_i","zero_point_all"], outputs=["ql_ui_out"], name="ql_u2")
clp_u2 = make_node("Clip", inputs=["ql_ui_out","min","max"], outputs=["clp_ui"], name="clp_u2")
dql_u2 = make_node("DequantizeLinear", inputs=["clp_ui","scale_i","zero_point_all"], outputs=["dql_ui_out"], name="dql_u2")

ql_u3 = make_node("QuantizeLinear", inputs=["U_c","scale_c","zero_point_all"], outputs=["ql_uc_out"], name="ql_u3")
clp_u3 = make_node("Clip", inputs=["ql_uc_out","min","max"], outputs=["clp_uc"], name="clp_u3")
dql_u3 = make_node("DequantizeLinear", inputs=["clp_uc","scale_c","zero_point_all"], outputs=["dql_uc_out"], name="dql_u3")

ql_u4 = make_node("QuantizeLinear", inputs=["U_o","scale_o","zero_point_all"], outputs=["ql_uo_out"], name="ql_u4")
clp_u4 = make_node("Clip", inputs=["ql_uo_out","min","max"], outputs=["clp_uo"], name="clp_u4")
dql_u4 = make_node("DequantizeLinear", inputs=["clp_uo","scale_o","zero_point_all"], outputs=["dql_uo_out"], name="dql_u4")

In [5]:
#Defining the individual nodes of the graph we want to create.
# So the order in which the inputs are specified matters. Can't describe the inputs in a random order.
# --------------------------------------------
#1st Equation
mul_node1_e1 = make_node("MatMul", inputs=["dql_wf_out","dql_input_out"], outputs=["out_m1_e1"], name="mul_node1_e1")
id_node_1_e1 = make_node("Identity", inputs=["out_m1_e1"], outputs=["i_f_matmul"], name="id_node_1_e1")
mul_node2_e1 = make_node("MatMul", inputs=["dql_uf_out","h_t-1"], outputs=["out_m2_e1"],name="mul_node2_e1")
id_node_2_e1 = make_node("Identity", inputs=["out_m2_e1"], outputs=["h_f_matmul"], name="id_node_2_e1")
add_node1_e1 = make_node("Add", inputs=["out_m1_e1","out_m2_e1"], outputs=["out_add1_e1"],name="add_node1_e1")
add_node2_e1 = make_node("Add", inputs=["out_add1_e1","b_f"], outputs=["f_t_ba"],name="add_node2_e1")
quant_linear1_e1 = make_node("QuantizeLinear", inputs=["f_t_ba","scale_3","zero_point_all"], outputs=["f_t_ql1"],name="quant_linear1_e1")
dequant_linear1_e1 = make_node("DequantizeLinear", inputs=["f_t_ql1", "scale_3", "zero_point_all"], outputs=["f_t_dql1"], name="dequant_linear1_e1")
sig_f_e1     = make_node("Sigmoid", inputs=["f_t_dql1"], outputs=["f_t"],name="sig_f_e1")
quant_linear2_e1 = make_node("QuantizeLinear", inputs=["f_t","scale_4","zero_point_unsigned"], outputs=["f_t_ql2"],name="quant_linear2_e1")
dequant_linear2_e1 = make_node("DequantizeLinear", inputs=["f_t_ql2", "scale_4", "zero_point_unsigned"], outputs=["f_t_dql2"], name="dequant_linear2_e1")
id_node_3_e1      = make_node("Identity", inputs=["f_t_dql2"], outputs=["f_t_gate"], name="id_node_3_e1")

In [6]:
#2nd Equation
mul_node1_e2 = make_node("MatMul", inputs=["dql_wi_out","dql_input_out"], outputs=["out_m1_e2"], name="mul_node1_e2")
mul_node2_e2 = make_node("MatMul", inputs=["dql_ui_out","h_t-1"], outputs=["out_m2_e2"],name="mul_node2_e2")
add_node1_e2 = make_node("Add", inputs=["out_m1_e2","out_m2_e2"], outputs=["out_add1_e2"],name="add_node1_e2")
add_node2_e2 = make_node("Add", inputs=["out_add1_e2","b_i"], outputs=["i_t_ba"],name="add_node2_e2")
quant_linear1_e2 = make_node("QuantizeLinear", inputs=["i_t_ba","scale_1","zero_point_all"], outputs=["i_t_ql1"],name="quant_linear1_e2")
dequant_linear1_e2 = make_node("DequantizeLinear", inputs=["i_t_ql1","scale_1", "zero_point_all"], outputs=["i_t_dql1"], name="dequant_linear1_e2")
sig_i_e2     = make_node("Sigmoid", inputs=["i_t_dql1"], outputs=["i_t"],name="sig_i_e2")
quant_linear2_e2 = make_node("QuantizeLinear", inputs=["i_t","scale_2","zero_point_unsigned"], outputs=["i_t_ql2"],name="quant_linear2_e2")
dequant_linear2_e2 = make_node("DequantizeLinear", inputs=["i_t_ql2", "scale_2", "zero_point_unsigned"], outputs=["i_t_dql2"], name="dequant_linear2_e2")
id_node_e2      = make_node("Identity", inputs=["i_t_dql2"], outputs=["i_t_gate"], name="id_node_e2")

In [7]:
#3rd Equation
mul_node1_e3 = make_node("MatMul", inputs=["dql_wo_out","dql_input_out"], outputs=["out_m1_e3"], name="mul_node1_e3")
mul_node2_e3 = make_node("MatMul", inputs=["dql_uo_out","h_t-1"], outputs=["out_m2_e3"],name="mul_node2_e3")
add_node1_e3 = make_node("Add", inputs=["out_m1_e3","out_m2_e3"], outputs=["out_add1_e3"],name="add_node1_e3")
add_node2_e3 = make_node("Add", inputs=["out_add1_e3","b_o"], outputs=["o_t_ba"],name="add_node2_e3" )
quant_linear1_e3 = make_node("QuantizeLinear", inputs=["o_t_ba","scale_7","zero_point_all"], outputs=["o_t_ql1"],name="quant_linear_e3")
dequant_linear1_e3 = make_node("DequantizeLinear", inputs=["o_t_ql1","scale_7", "zero_point_all"], outputs=["o_t_dql1"], name="dequant_linear_e3")
sig_o_e3     = make_node("Sigmoid", inputs=["o_t_dql1"], outputs=["o_t"],name="sig_o_e3")
quant_linear2_e3 = make_node("QuantizeLinear", inputs=["o_t","scale_8","zero_point_unsigned"], outputs=["o_t_ql2"],name="quant_linear2_e3")
dequant_linear2_e3 = make_node("DequantizeLinear", inputs=["o_t_ql2", "scale_8", "zero_point_unsigned"], outputs=["o_t_dql2"], name="dequant_linear2_e3")
id_node_e3      = make_node("Identity", inputs=["o_t_dql2"], outputs=["o_t_gate"], name="id_node_e3")

In [8]:
#4th Equation
mul_node1_e4 = make_node("MatMul", inputs=["dql_wc_out","dql_input_out"], outputs=["out_m1_e4"], name="mul_node1_e4")
mul_node2_e4 = make_node("MatMul", inputs=["dql_uc_out","h_t-1"], outputs=["out_m2_e4"],name="mul_node2_e4")
add_node1_e4 = make_node("Add", inputs=["out_m1_e4","out_m2_e4"], outputs=["out_add1_e4"],name="add_node1_e4")
add_node2_e4 = make_node("Add", inputs=["out_add1_e4","b_c"], outputs=["c_t_ba"],name="add_node2_e4")
quant_linear1_e4 = make_node("QuantizeLinear", inputs=["c_t_ba","scale_5","zero_point_all"], outputs=["c_t_ql1"],name="quant_linear1_e4")
dequant_linear1_e4 = make_node("DequantizeLinear", inputs=["c_t_ql1","scale_5", "zero_point_all"], outputs=["c_t_dql1"], name="dequant_linear1_e4")
tanh_c_e4    = make_node("Tanh", inputs=["c_t_dql1"], outputs=["c_t_partial"],name="tanh_c_e4")
quant_linear2_e4 = make_node("QuantizeLinear", inputs=["c_t_partial","scale_6","zero_point_all"], outputs=["c_t_ql2"],name="quant_linear2_e4")
dequant_linear2_e4 = make_node("DequantizeLinear", inputs=["c_t_ql2", "scale_6", "zero_point_all"], outputs=["c_t_dql2"], name="dequant_linear2_e4")
id_node_e4      = make_node("Identity", inputs=["c_t_dql2"], outputs=["c_t_gate"], name="id_node_e4")

In [9]:
#5th Equation
el_mul_node1_e5 = make_node("Mul", inputs=["f_t_dql2","c_t-1"], outputs=["out_el_mul1_e5"],name="el_mul_node1_e5")
quant_linear1_e5 = make_node("QuantizeLinear", inputs=["out_el_mul1_e5","scale_9","zero_point_all"], outputs=["fifth_ql1"],name="quant_linear1_e5")
dequant_linear1_e5 = make_node("DequantizeLinear", inputs=["fifth_ql1","scale_9", "zero_point_all"], outputs=["fifth_dql1"], name="dequant_linear1_e5")
el_mul_node2_e5 = make_node("Mul", inputs=["i_t_dql2","c_t_dql2"], outputs=["out_el_mul2_e5"], name="el_mul_node2_e5") 
quant_linear2_e5 = make_node("QuantizeLinear", inputs=["out_el_mul2_e5","scale_9","zero_point_all"], outputs=["fifth_ql2"],name="quant_linear2_e5")
dequant_linear2_e5 = make_node("DequantizeLinear", inputs=["fifth_ql2","scale_9", "zero_point_all"], outputs=["fifth_dql2"], name="dequant_linear2_e5")
out_add1_e5     = make_node("Add", inputs=["fifth_dql1","fifth_dql2"], outputs=["c_t"], name="out_add1_e5")
quant_linear3_e5 = make_node("QuantizeLinear", inputs=["c_t","scale_9","zero_point_all"], outputs=["h_t_ql"], name="quant_linear3_e5")
dequant_linear3_e5 = make_node("DequantizeLinear", inputs=["h_t_ql","scale_9","zero_point_all"], outputs=["h_t_dql"], name="dequant_linear3_e5")

In [10]:
#6th Equation
tanh_node_e6    = make_node("Tanh", inputs=["h_t_dql"], outputs=["out_tanh_e6"], name="tanh_node_e6") 
quant_linear1_e6 = make_node("QuantizeLinear", inputs=["out_tanh_e6","scale_10","zero_point_all"], outputs=["sixth_ql1"], name="quant_linear1_e6")
dequant_linear1_e6 = make_node("DequantizeLinear", inputs=["sixth_ql1","scale_10","zero_point_all"], outputs=["sixth_dql1"], name="dequant_linear1_e6")
el_mul_node1_e6 = make_node("Mul", inputs=["sixth_dql1","o_t_dql2"], outputs=["h_t_inter"], name="el_mul_node1_e6")#h_t_inter
quant_linear2_e6 = make_node("QuantizeLinear", inputs=["h_t_inter","scale_11","zero_point_all"], outputs=["sixth_ql2"], name="quant_linear2_e6")
dequant_linear2_e6 = make_node("DequantizeLinear", inputs=["sixth_ql2","scale_11","zero_point_all"], outputs=["h_t"], name="dequant_linear2_e6")
id_node_e6      = make_node("Identity", inputs=["h_t"], outputs=["h_t_concat"], name="id_node_e6")
##Adding an Identity node after the hidden state compute to concatenate all the hidden states in the scan node.

In [11]:
#Initializing the values of the wieght matrix, recurrence matrix and the bias matrix
# Ws_val = np.ones([20,10],dtype=np.float32).reshape([20,10])
# Us_val = np.ones([20,20],dtype=np.float32).reshape([20,20])
# bias_val = np.ones([20,1],dtype=np.float32).reshape([20,1])
# scale_val = np.float32(1)
# zero_point_val = np.uint8(0)

#Initializing the values of the wieght matrix, recurrence matrix and the bias matrix
# Wi_val = np.empty([20,10],dtype=np.float32).reshape([20,10])
# Wi_val.fill(0)
# Ui_val = np.empty([20,20],dtype=np.float32).reshape([20,20])
# Ui_val.fill(2)
# bi_val = np.empty([20,1],dtype=np.float32).reshape([20,1])
# bi_val.fill(1)

# Wo_val = np.empty([20,10],dtype=np.float32).reshape([20,10])
# Wo_val.fill(0)
# Uo_val = np.empty([20,20],dtype=np.float32).reshape([20,20])
# Uo_val.fill(2)
# bo_val = np.empty([20,1],dtype=np.float32).reshape([20,1])
# bo_val.fill(1)

# Wf_val = np.empty([20,10],dtype=np.float32).reshape([20,10])
# Wf_val.fill(0)
# Uf_val = np.empty([20,20],dtype=np.float32).reshape([20,20])
# Uf_val.fill(2)
# bf_val = np.empty([20,1],dtype=np.float32).reshape([20,1])
# bf_val.fill(1)

# Wc_val = np.empty([20,10],dtype=np.float32).reshape([20,10])
# Wc_val.fill(0)
# Uc_val = np.empty([20,20],dtype=np.float32).reshape([20,20])
# Uc_val.fill(2)
# bc_val = np.empty([20,1],dtype=np.float32).reshape([20,1])
# bc_val.fill(1)

# Depending on the weight bit-widths we can calculate the min and max values for each clip node here and then supply those
# values in the initializer in the make_graph helper node.
#Similarily all the scale and zero-point values can be defined here and then used in the initializers.

In [12]:
# showInNetron('./quant_lstm_weight_only_4b.onnx')#,localhost_url='xirxlabs53'
# showInNetron('./quant-lstm-full-quantization.onnx',localhost_url='xirxlabs53')#,localhost_url='xirxlabs53'
showInNetron('./quant_lstm_full_quantization_qcdq.onnx',localhost_url='xirxlabs53')#,localhost_url='xirxlabs53'

Serving './quant_lstm_full_quantization_qcdq.onnx' at http://0.0.0.0:5901


### ModelWrapper : For node addition and deletion.

This is the part where we take the exported QuantLSTM from brevitas, delete the LSTM custom_op and replace it with the scan node there.

In [54]:
wrapper_model = onnx.load("./vanilla-lstm-all-qcdq.onnx")
from qonnx.core.modelwrapper import ModelWrapper
finn_model = ModelWrapper(wrapper_model)

def get_node_id(model):
    node_index = {}
    node_ind = 0
    for node in model.graph.node:
        node_index[node.name] = node_ind
        node_ind += 1
    return node_index

index = {}
index = get_node_id(finn_model)
# print(index)

def identify_adder_nodes(model):
    add_nodes = []
    for node in model.graph.node:
        if node.op_type == "LSTM":
            model.graph.node.remove(node) #Here we can remove the LSTM node from the graph!  #Now just need to insert the scan node here in it's place which properly takes inputs. #Remove the node and insert the node here only.
            #Append statement here.
            #model.graph.node.append
    return add_nodes

lstm_nodes = identify_adder_nodes(finn_model)
# print(lstm_nodes)

index = {}
index = get_node_id(finn_model)
# print(index)

In [55]:
# qcdq_lstm_weight_only = onnx.load("./quant_lstm_weight_only_4b.onnx")
# qcdq_lstm_full_quantization = onnx.load("./quant_lstm_full_quantization_qcdq.onnx")
# weights = qcdq_lstm_full_quantization.graph.initializer
# # print(weights[0].shape)
# print(len(weights))
# for i in range(len(weights)):
#     w = numpy_helper.to_array(weights[i])
#     print (qcdq_lstm_full_quantization.graph.initializer[i].name)
#     print(w.shape)
# #     print(w)
#     print("-------------------------")

# print(weights[19])

# print (qcdq_lstm_weight_only.graph.node[0].input[1]) # dense_input         = 1. layer
# print (qcdq_lstm_weight_only.graph.initializer[0].name) # dense_1/kernel:0 = last layer

#Order in which to read the weights is = Input, forget, cell and output. Got this from the initializer names.
# Wi_val = numpy_helper.to_array(weights[0])
# Ui_val = numpy_helper.to_array(weights[1])
# Wf_val = numpy_helper.to_array(weights[2])
# Uf_val = numpy_helper.to_array(weights[3])
# Wc_val = numpy_helper.to_array(weights[4])
# Uc_val = numpy_helper.to_array(weights[5])
# Wo_val = numpy_helper.to_array(weights[6])
# Uo_val = numpy_helper.to_array(weights[7])

# all_bias = numpy_helper.to_array(weights[8])
# all_bias = all_bias.reshape([160,1])

#Order in which to read the biases = Input, forget, output and cell. Gives the best results. But could not get this information from the intitilaizer names.
#Tried this random order out of the available 24 and the above ones worked properly.
# bi_val = all_bias[0:20,:]
# bf_val = all_bias[20:40,:]
# bo_val = all_bias[40:60,:]
# bc_val = all_bias[60:80,:] # So the biases maybe in this order. When I read the bias values in this order are same upto the second decimal place.
#I have been able to test the above bias order with np.random.uniform([5,20,1]) inputs.
#And the results are very close. There is some differnce that is coming up! It maybe because of the way the weights are unsqueezd and concatenated maybe some values change there.

# print(bi_val.shape)
# print(onnx_model)
# So all the weights and biases are stored as initializers in the onnx graph. We access them and then print the shapes of the each of them.
# So the first 8 entries in the weights stored as initialzed correspond to the 4 paris of weight and recurrence matrix(W_s, U_s) as evident from their shapes.
# Weight Matrix : [Output_Dimension X Input_Dimension], Recurrence Matrix : [Output_Dimension x Output_Dimension]
# The last entry corresponds to the concatenated weights and recurrence biases (Total 8 each with shape : [Output_Dimension x 1]).
# Hence, in our case the final shape is [1,160]. From the values we also see that the values of the recurrent biases currently are set to '0'.

#Questions : 
#1. I can access the weights values in the onnx graph. But they don't have scale_factor and zero_point in their values. So these are not quantized values I am assuming. Which will bring out different outputs when executed. 
#2. What order are these matrices in is now the question? From the original documentation in onnx operators, I am assuming the order to be 'input_gate','output_gate', 'forget_gate' and 'cell_gate'.
#3. What are the initial values of the hidden and cell state used? 'initial_h' and 'initial_c' are also variables and if they are not initialized then they are considered '0' in the original LSTM onnx cell. So I am assuming the same here.
#4. How are the quantized inputs fed into the model? But for this case with only weight quantization; inputs are not quantized and are fed directly to the model.

#The outputs are in the order of all concatenated hidden states, final hidden state, final cell state.

#Will have to edit the scan-body of the scan node as it looks that the all the weight and recurrence matrices are concatenated before they are fed into the custom_op.

In [13]:
qcdq_lstm_full_quantization = onnx.load("./quant_lstm_full_quantization_qcdq.onnx")
weights = qcdq_lstm_full_quantization.graph.initializer
# print(weights[0].shape)
print(len(weights))
for i in range(len(weights)):
    w = numpy_helper.to_array(weights[i])
    print (qcdq_lstm_full_quantization.graph.initializer[i].name)
    print(w.shape)
    print(w)
    print("-------------------------")
    
bi_val = numpy_helper.to_array(weights[0])
# print(bi_val)
Wi_val = numpy_helper.to_array(weights[1])
Ui_val = numpy_helper.to_array(weights[2])

bf_val = numpy_helper.to_array(weights[3])
Wf_val = numpy_helper.to_array(weights[4])
Uf_val = numpy_helper.to_array(weights[5])

bc_val = numpy_helper.to_array(weights[6])
Wc_val = numpy_helper.to_array(weights[7])
Uc_val = numpy_helper.to_array(weights[8])

bo_val = numpy_helper.to_array(weights[9])
Wo_val = numpy_helper.to_array(weights[10])
Uo_val = numpy_helper.to_array(weights[11])

35
layers.0.0.input_gate_params.bias
(20,)
[-0.02587563 -0.18425222 -0.18189065  0.02914573 -0.21827428  0.0595416
 -0.20598626 -0.15559138 -0.04639753 -0.2133838   0.18059207  0.18321364
 -0.11679631  0.04684116  0.11439164  0.07105622 -0.02995344 -0.21090843
  0.1625932  -0.19612479]
-------------------------
layers.0.0.input_gate_params.input_weight.weight
(20, 10)
[[-4.14119214e-02  1.38706667e-02 -7.36431107e-02 -8.17852393e-02
  -1.93256751e-01  1.23205660e-02 -2.53894478e-02  1.94940954e-01
  -7.36160800e-02  1.72829047e-01]
 [ 1.05855539e-02 -1.00462548e-01 -5.31778559e-02 -2.53751595e-02
   2.31616711e-03 -3.68398018e-02  6.63604736e-02  1.84143797e-01
   3.51473056e-02  8.09932351e-02]
 [ 1.38081744e-01  4.81988601e-02  1.03076197e-01  1.17293097e-01
   2.09298924e-01 -2.04075590e-01  7.65163079e-02 -1.01319486e-02
  -4.01576199e-02 -8.62098187e-02]
 [ 1.34432539e-01  2.04552680e-01 -1.82483241e-01  1.20810278e-01
   1.54187992e-01  3.90806384e-02  2.63404008e-03  1.72071218e

In [14]:
# sess = rt.InferenceSession(qcdq_lstm_weight_only.SerializeToString())
# sess = rt.InferenceSession(qcdq_lstm_full_quantization.SerializeToString())
# input_name = sess.get_inputs()[0].name
# # print(input_name)
# in1_qcdq =  np.ones((5,1,10)).astype(np.float32)
# pred_onnx = sess.run(None, {input_name: in1_qcdq})
# print(pred_onnx)

In [42]:
#Defining the graph for the LSTM compute iteration solving the six equations
lstm_scan = make_graph(
    nodes=[
           ql_input,
           dql_input, 
           ql_w1,
           clp_w1, 
           dql_w1,
           ql_w2,
           clp_w2, 
           dql_w2,
           ql_w3,
           clp_w3, 
           dql_w3,
           ql_w4,
           clp_w4, 
           dql_w4,
           ql_u1,
           clp_u1, 
           dql_u1,
           ql_u2,
           clp_u2,
           dql_u2,    
           ql_u3,
           clp_u3,
           dql_u3,    
           ql_u4,
           clp_u4,
           dql_u4, 
           mul_node1_e1,
           id_node_1_e1,
           mul_node2_e1,
           id_node_2_e1, 
           add_node1_e1, 
           add_node2_e1,
           quant_linear1_e1,
           dequant_linear1_e1,
           sig_f_e1,
           quant_linear2_e1, 
           dequant_linear2_e1,
           id_node_3_e1, 
           mul_node1_e2, 
           mul_node2_e2, 
           add_node1_e2, 
           add_node2_e2,
           quant_linear1_e2,
           dequant_linear1_e2,
           sig_i_e2,
           quant_linear2_e2,
           dequant_linear2_e2,
           id_node_e2, 
           mul_node1_e3, 
           mul_node2_e3, 
           add_node1_e3, 
           add_node2_e3,
           quant_linear1_e3,
           dequant_linear1_e3,
           sig_o_e3,
           quant_linear2_e3,
           dequant_linear2_e3,
           id_node_e3, 
           mul_node1_e4, 
           mul_node2_e4, 
           add_node1_e4, 
           add_node2_e4,
           quant_linear1_e4,
           dequant_linear1_e4,
           tanh_c_e4,
           quant_linear2_e4,
           dequant_linear2_e4,
           id_node_e4, 
           el_mul_node1_e5,
           quant_linear1_e5, 
           dequant_linear1_e5,
           el_mul_node2_e5,
           quant_linear2_e5,
           dequant_linear2_e5,
           out_add1_e5,
           quant_linear3_e5, 
           dequant_linear3_e5,
           tanh_node_e6,
           quant_linear1_e6, 
           dequant_linear1_e6,
           el_mul_node1_e6,
           quant_linear2_e6,
           dequant_linear2_e6,   
           id_node_e6
          ],
    name = "QCDQ-LSTM-SCAN",
    inputs=[inp2_m2,inp2_elm1,inp2_m1], #The order in which the inputs are defined here should match the input order when the scan node is defined.
    outputs = [out_hidden_state,out_cell_state,out_hidden_state_concat],#out_add2_e3
    #out_forget_gate,out_input_gate,out_cell_gate,out_out_gate,out_hidden_forget_matmul,out_input_forget_matmul
    value_info=[
            make_tensor_value_info("ql_input_out",onnx.TensorProto.INT8, [10,1]),
            make_tensor_value_info("dql_input_out",onnx.TensorProto.FLOAT, [10,1]),
            make_tensor_value_info("out_m1_e1",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_m2_e1",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_add1_e1",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("f_t_ba",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("f_t_ql1",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("f_t_dql1", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("f_t_ql2",onnx.TensorProto.UINT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("f_t_dql2", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("out_m1_e2",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_m2_e2",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_add1_e2",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("i_t_ba",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("i_t_ql1",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("i_t_dql1", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("i_t_ql2",onnx.TensorProto.UINT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("i_t_dql2", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("out_m1_e3",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_m2_e3",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_add1_e3",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("o_t_ba",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("o_t_ql1",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("o_t_dql1", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("o_t_ql2",onnx.TensorProto.UINT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("o_t_dql2", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("out_m1_e4",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_m2_e4",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_add1_e4",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("c_t_ba",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("c_t_ql1",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("c_t_dql1", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("c_t_ql2",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("c_t_dql2", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("f_t",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("i_t",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("o_t",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("c_t_partial",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_el_mul1_e5",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("out_el_mul2_e5",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("fifth_ql1",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("fifth_dql1", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("fifth_ql2",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("fifth_dql2", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("h_t_ql",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("h_t_dql", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("out_tanh_e6",onnx.TensorProto.FLOAT, [20,1]),
            make_tensor_value_info("sixth_ql1",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("sixth_dql1", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("sixth_ql2",onnx.TensorProto.INT8, [20,1]),#Output of the quantized linear layer. Therefore the datatype INT8.
            make_tensor_value_info("h_t_inter", onnx.TensorProto.FLOAT, [20,1]),#Output of the dequantized linear layer. Therefore float datatype as the input will now be processed differently.
            make_tensor_value_info("ql_wf_out", onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("dql_wf_out",onnx.TensorProto.FLOAT, [20,10]),
            make_tensor_value_info("ql_wi_out", onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("dql_wi_out",onnx.TensorProto.FLOAT, [20,10]),
            make_tensor_value_info("ql_wc_out", onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("dql_wc_out",onnx.TensorProto.FLOAT, [20,10]),
            make_tensor_value_info("ql_wo_out", onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("dql_wo_out",onnx.TensorProto.FLOAT, [20,10]),
            make_tensor_value_info("ql_uf_out",onnx.TensorProto.INT8, [20,20]),
            make_tensor_value_info("dql_uf_out",onnx.TensorProto.FLOAT, [20,20]),
            make_tensor_value_info("ql_ui_out",onnx.TensorProto.INT8, [20,20]),
            make_tensor_value_info("dql_ui_out",onnx.TensorProto.FLOAT, [20,20]),
            make_tensor_value_info("ql_uc_out",onnx.TensorProto.INT8, [20,20]),
            make_tensor_value_info("dql_uc_out",onnx.TensorProto.FLOAT, [20,20]),
            make_tensor_value_info("ql_uo_out",onnx.TensorProto.INT8, [20,20]),
            make_tensor_value_info("dql_uo_out",onnx.TensorProto.FLOAT, [20,20]),
            make_tensor_value_info("clp_wf",onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("clp_wi",onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("clp_wc",onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("clp_wo",onnx.TensorProto.INT8, [20,10]),
            make_tensor_value_info("clp_uf",onnx.TensorProto.INT8, [20,20]), 
            make_tensor_value_info("clp_ui",onnx.TensorProto.INT8, [20,20]),
            make_tensor_value_info("clp_uc",onnx.TensorProto.INT8, [20,20]),
            make_tensor_value_info("clp_uo",onnx.TensorProto.INT8, [20,20]),
        ],
    initializer=[make_tensor('W_f',onnx.TensorProto.FLOAT, [20,10], (Wf_val)),
                 make_tensor('U_f',onnx.TensorProto.FLOAT, [20,20], (Uf_val)),
                 make_tensor('b_f',onnx.TensorProto.FLOAT, [20,1], (bf_val)),
                 #Scalars 'scale' and 'zero_point' should be defined as below. Converting them into numpy array based single values causes some errors and exceptions saying that these values should be scalar. The definition has to be like this.
                 # Scalars are tensors with undefined shapes.
                 make_tensor('scale_all',onnx.TensorProto.FLOAT,[],[1]),
                 make_tensor('inp_scale',onnx.TensorProto.FLOAT, [],[0.0057353885]),
                 make_tensor('scale_i',onnx.TensorProto.FLOAT, [],[0.001760039]),
                 make_tensor('scale_c',onnx.TensorProto.FLOAT, [],[0.0017542557]),
                 make_tensor('scale_o',onnx.TensorProto.FLOAT, [],[0.0017601603]),
                 make_tensor('scale_f',onnx.TensorProto.FLOAT, [],[0.0017546351]),
                 make_tensor('scale_1',onnx.TensorProto.FLOAT, [],[0.0057353885]),
                 make_tensor('scale_2',onnx.TensorProto.FLOAT, [],[0.0034227842]),
                 make_tensor('scale_3',onnx.TensorProto.FLOAT, [],[0.006515566]),
                 make_tensor('scale_test',onnx.TensorProto.FLOAT, [],[0.00781916]),#Correct input scale : 0.00781916
                 make_tensor('scale_4',onnx.TensorProto.FLOAT, [],[0.00731916]),
                 make_tensor('scale_5',onnx.TensorProto.FLOAT, [],[0.005926438]),
                 make_tensor('scale_6',onnx.TensorProto.FLOAT, [],[0.0071433834]),
                 make_tensor('scale_7',onnx.TensorProto.FLOAT, [],[0.0085895785]), #Approximate scale_7 value = 0.0085895785
#                  make_tensor('scale_7',onnx.TensorProto.FLOAT, [],[0]),
                 make_tensor('scale_8',onnx.TensorProto.FLOAT, [],[0.0026683041]),#0.0026683041
                 make_tensor('scale_9',onnx.TensorProto.FLOAT, [],[0.0049660783]),
                 make_tensor('scale_10',onnx.TensorProto.FLOAT, [],[0.0027938376]),
                 make_tensor('scale_11',onnx.TensorProto.FLOAT, [],[0.0036052174]),#0.0036052174
#                  make_tensor('scale_11',onnx.TensorProto.FLOAT, [],[0.0057353885]),
                 
                 make_tensor('zero_point_all',onnx.TensorProto.INT8,[],[0]),#Zero-point datatype is int8 or unit8 and some advanced floating point datatypes.
                 make_tensor('zero_point_unsigned',onnx.TensorProto.UINT8,[],[0]),#Zero-point datatype is int8 or unit8 and some advanced floating point datatypes.
                 #Introducing scalars for the clip operators.
                 make_tensor('min', onnx.TensorProto.INT8, [], [-127]),
                 make_tensor('max', onnx.TensorProto.INT8, [], [127]),
                 make_tensor('W_i',onnx.TensorProto.FLOAT, [20,10], (Wi_val)),
                 make_tensor('U_i',onnx.TensorProto.FLOAT, [20,20], (Ui_val)),
                 make_tensor('b_i',onnx.TensorProto.FLOAT, [20,1], (bi_val)),
                 make_tensor('W_o',onnx.TensorProto.FLOAT, [20,10], (Wo_val)),
                 make_tensor('U_o',onnx.TensorProto.FLOAT, [20,20], (Uo_val)),
                 make_tensor('b_o',onnx.TensorProto.FLOAT, [20,1], (bo_val)),
                 make_tensor('W_c',onnx.TensorProto.FLOAT, [20,10], (Wc_val)),
                 make_tensor('U_c',onnx.TensorProto.FLOAT, [20,20], (Uc_val)),
                 make_tensor('b_c',onnx.TensorProto.FLOAT, [20,1], (bc_val))
                ]
)

In [4]:
# onnx_model = qonnx_make_model(lstm_scan, producer_name="QuantizeLSTM_scan")
# onnx.save(onnx_model, './quantize-lstm-full-graph.onnx')
# showInNetron('./quantize-lstm-full-graph.onnx',localhost_url='xirxlabs53')#localhost_url='xirxlabs53'
# showInNetron('./quant_lstm_full_quantization_qcdq.onnx')#localhost_url='xirxlabs53'
# showInNetron('./quantize-lstm-full-graph-test.onnx',localhost_url='xirxlabs53')#localhost_url='xirxlabs53'
# showInNetron('./lstm_scan_node_model.onnx',localhost_url='xirxlabs53')#localhost_url='xirxlabs53'
# showInNetron('./lstm-gate-sigmoid.onnx',localhost_url='xirxlabs53')#localhost_url='xirxlabs53'
showInNetron('./test_brevitas_relu_act_export.onnx',localhost_url='xirxlabs53')#localhost_url='xirxlabs53'

Stopping http://0.0.0.0:8081
Serving './test_brevitas_relu_act_export.onnx' at http://0.0.0.0:8081


----------------------------------------
Exception happened during processing of request from ('172.21.37.103', 57466)
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/socketserver.py", line 650, in process_request_thread
    self.finish_request(request, client_address)
  File "/opt/conda/lib/python3.8/socketserver.py", line 360, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/opt/conda/lib/python3.8/socketserver.py", line 720, in __init__
    self.handle()
  File "/opt/conda/lib/python3.8/http/server.py", line 427, in handle
    self.handle_one_request()
  File "/opt/conda/lib/python3.8/http/server.py", line 395, in handle_one_request
    self.raw_requestline = self.rfile.readline(65537)
  File "/opt/conda/lib/python3.8/socket.py", line 669, in readinto
    return self._sock.recv_into(b)
ConnectionResetError: [Errno 104] Connection reset by peer
----------------------------------------


In [10]:
from qonnx.core.modelwrapper import ModelWrapper
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes

m_path = "test_brevitas_relu_act_export.onnx"
model = ModelWrapper(m_path)
model = model.transform(ConvertQONNXtoFINN())
model = model.transform(InferShapes())
model.save("./qonnx_to_finn.onnx")

In [11]:
showInNetron('./qonnx_to_finn.onnx')#localhost_url='xirxlabs53'

Stopping http://0.0.0.0:5901
Serving './qonnx_to_finn.onnx' at http://0.0.0.0:5901


In [7]:
scan_model = onnx.load("./lstm_scan_node_model_14.onnx")
print(scan_model)

ir_version: 8
producer_name: "LSTM-Scan"
graph {
  node {
    input: "inp_a"
    input: "inp_b"
    input: "scan_input"
    output: "out_a"
    output: "out_b"
    output: "out_c"
    op_type: "Scan"
    attribute {
      name: "body"
      g {
        node {
          input: "X"
          input: "inp_scale"
          input: "zero_point_all"
          output: "ql_input_out"
          name: "ql_input"
          op_type: "QuantizeLinear"
        }
        node {
          input: "ql_input_out"
          input: "inp_scale"
          input: "zero_point_all"
          output: "dql_input_out"
          name: "dql_input"
          op_type: "DequantizeLinear"
        }
        node {
          input: "W_f"
          input: "scale_f"
          input: "zero_point_all"
          output: "ql_wf_out"
          name: "ql_w1"
          op_type: "QuantizeLinear"
        }
        node {
          input: "ql_wf_out"
          input: "min"
          input: "max"
          output: "clp_wf"
          name

In [44]:
print(onnx_model)

ir_version: 8
producer_name: "QuantizeLSTM_scan"
graph {
  node {
    input: "X"
    input: "inp_scale"
    input: "zero_point_all"
    output: "ql_input_out"
    name: "ql_input"
    op_type: "QuantizeLinear"
  }
  node {
    input: "ql_input_out"
    input: "scale_test"
    input: "zero_point_all"
    output: "dql_input_out"
    name: "dql_input"
    op_type: "DequantizeLinear"
  }
  node {
    input: "W_f"
    input: "scale_f"
    input: "zero_point_all"
    output: "ql_wf_out"
    name: "ql_w1"
    op_type: "QuantizeLinear"
  }
  node {
    input: "ql_wf_out"
    input: "min"
    input: "max"
    output: "clp_wf"
    name: "clp_w1"
    op_type: "Clip"
  }
  node {
    input: "clp_wf"
    input: "scale_f"
    input: "zero_point_all"
    output: "dql_wf_out"
    name: "dql_w1"
    op_type: "DequantizeLinear"
  }
  node {
    input: "W_i"
    input: "scale_i"
    input: "zero_point_all"
    output: "ql_wi_out"
    name: "ql_w2"
    op_type: "QuantizeLinear"
  }
  node {
    input: "ql

In [45]:
#Converting to opset version '14' to accomodate clip nodes with INT8 and UINT8 input 
onnx_model.opset_import[0].version = 14
print(onnx_model)

ir_version: 8
producer_name: "QuantizeLSTM_scan"
graph {
  node {
    input: "X"
    input: "inp_scale"
    input: "zero_point_all"
    output: "ql_input_out"
    name: "ql_input"
    op_type: "QuantizeLinear"
  }
  node {
    input: "ql_input_out"
    input: "scale_test"
    input: "zero_point_all"
    output: "dql_input_out"
    name: "dql_input"
    op_type: "DequantizeLinear"
  }
  node {
    input: "W_f"
    input: "scale_f"
    input: "zero_point_all"
    output: "ql_wf_out"
    name: "ql_w1"
    op_type: "QuantizeLinear"
  }
  node {
    input: "ql_wf_out"
    input: "min"
    input: "max"
    output: "clp_wf"
    name: "clp_w1"
    op_type: "Clip"
  }
  node {
    input: "clp_wf"
    input: "scale_f"
    input: "zero_point_all"
    output: "dql_wf_out"
    name: "dql_w1"
    op_type: "DequantizeLinear"
  }
  node {
    input: "W_i"
    input: "scale_i"
    input: "zero_point_all"
    output: "ql_wi_out"
    name: "ql_w2"
    op_type: "QuantizeLinear"
  }
  node {
    input: "ql

In [46]:
# Testing to check if the model is serializing without errors or warnings
#Even after converting to an opset version of 14 there was an error saying that the clip operator is tied to two different datatypes (int8 and float)
#That was because the MIN and the MAX values were defined as FLOAT tensors and the Clip operator constrains the input and output datatypes to be the same.
#Converting them to INT8 datatypes solved that error.
sess = rt.InferenceSession(onnx_model.SerializeToString())

2023-07-11 15:11:44.757459428 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'scale_all'. It is not used by any node and should be removed from the model.


So some points to note here:
1. Initializers (`W_s`,`U_s` and `b_s`) are a part of the model and they will not be defined in the list of the inputs (inp1_m1, inp1_m2 & bias).
2. Because they are a part of the model, these initializers will not be defined again when we define the scan node later, which we will see.
3. Scan node only cares about the inputs and outputs of the 'body_graph' and does not care what happens inside it.

# Start debugging from this error : Done

Solved the error. The error was in the way scalars were represented and they were not being interpreted as scalars. That has now been corrected in their definitiion when the graph is initialized.

#### Test settings

1. For the test, the `input_length` = 10, `hidden_size` = 20.
2. Values of the `weight matrix` and the `recurrence weight matrix` are 1.
3. The input vector `X` is 1.
4. The value of the `weight biases` = 1.
5. The initial values of the `hidden_state` and the `cell_state` = 0.
6. The LSTM node is defined without `peepholes` and the `recurrence biases` for now.

In [47]:
# Defining the values of the varibales to test the execution of the onnx model
in1lstm =  np.ones((10, 1)).astype(np.float32)
print(in1lstm)
in2lstm =  np.zeros((20, 1)).astype(np.float32)
in3lstm =  np.zeros((20, 1)).astype(np.float32)
input_dict = {}
input_dict["X"] = in1lstm
input_dict["h_t-1"] = in2lstm
input_dict["c_t-1"] = in3lstm 

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [21]:
#Executing the onnx model here.
sess = rt.InferenceSession(onnx_model.SerializeToString())
output = sess.run(None, input_dict)
print(output[0])
print('Forget gate = ',output[3].reshape([1,20]))
print('Input gate = ',output[4].reshape([1,20]))
print('Cell gate = ',output[5].reshape([1,20]))
print('Output gate = ',output[6].reshape([1,20]))
print('h_f_matmul = ',output[7].reshape([1,20]))
print('i_f_matmul = ',output[8].reshape([1,20]))

2023-07-11 15:08:53.063785296 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'scale_all'. It is not used by any node and should be removed from the model.


[[ 0.18026087]
 [-0.01802609]
 [ 0.05047305]
 [ 0.18386608]
 [ 0.01081565]
 [-0.00360522]
 [ 0.12257739]
 [-0.11897217]
 [ 0.03965739]
 [-0.06849913]
 [ 0.01442087]
 [-0.14060348]
 [ 0.11897217]
 [ 0.03965739]
 [ 0.20910262]
 [-0.02523652]
 [-0.03244696]
 [-0.02523652]
 [-0.0612887 ]
 [ 0.00721043]]


IndexError: list index out of range

In [39]:
forget_out = np.load('./forget.npy')
hidden_out = np.load('./hidden.npy')
# print(forget_out)
input_out = np.load('./input.npy')
cell_out = np.load('./cell.npy')
out_out = np.load('./out.npy')
i_f_matmul = np.load('./quant_if_gate.npy')

my_hidden_out = output[0].reshape([1,20])
my_hidden_out_2 = output[2].reshape([1,20])
my_forget_out = output[3].reshape([1,20])
my_input_out = output[4].reshape([1,20])
my_cell_out = output[5].reshape([1,20])
my_out_out = output[6].reshape([1,20])
my_i_f_out = output[8].reshape([1,20])

print('My_Hidden out = ',my_hidden_out)
print('Brevitas_Hidden_out = ',hidden_out[0])

# print('Forget gate = ',forget_out[0])
# print('Input gate = ',input_out[0])
# print('Cell gate = ',cell_out[0])
# print('Output gate = ',out_out[0])
# print('i_f_matmul =',i_f_matmul[0])

scale = 0.005#0.005

diff_hidden = np.round((my_hidden_out - hidden_out[0])/scale)
diff_forget = np.round((my_forget_out - forget_out[0])/scale)
diff_input = np.round((my_input_out - input_out[0])/scale)
diff_cell = np.round((my_cell_out - cell_out[0])/scale)
diff_out = np.round((my_out_out - out_out[0])/scale)
diff_i_f = np.round((my_i_f_out - i_f_matmul[0])/scale)
diff_hidden_2 = np.round((my_hidden_out_2 - hidden_out)/scale)


# diff_forget = my_forget_out - forget_out[0]
# diff_input = my_input_out - input_out[0]
# diff_cell = my_cell_out - cell_out[0]
# diff_out = my_out_out - out_out[0]
# diff_i_f = my_i_f_out - i_f_matmul[0]

print('Diff hidden state = ',diff_hidden)
print('Diff forget_gate = ',diff_forget)
print('Diff input_gate = ',diff_input)
print('Diff cell_gate = ',diff_cell)
print('Diff out_gate = ',diff_out)
print('Diff first input_multiplication = ', diff_i_f)
# print(diff_hidden_2)

# comp = scan_full_out - brevitas_out
# # print(comp)
# round_comp = np.round(comp/0.0036052174400538206)
# print(round_comp)

IndexError: list index out of range

In [139]:
# Diff forget =  [[ 3.  7. 11.  3. -6.  2. -8. -4.  7.  3. -0. 10.  1. -6. -3.  2. -5.  2.
#   -6.  0.]]
# Diff input =  [[ -7. -12. -22. -43. -15.   6. -21. -14.  10.   3.  10. -13. -23. -16.
#   -36. -18.   5.   0. -11.  -2.]]
# Diff cell =  [[-28.   8.  -7. -20.   1.  11. -19.  27. -11.  17.   5.  29. -11. -10.
#   -33.   5.  14.  17.  19.   1.]]
# Diff out =  [[-63. -75. -58. -55.  -4.  -3. -23. -17. -14. -21. -32. -34. -41. -51.
#   -50. -41. -21. -19. -21.  -8.]]

In [None]:
#Forget gate
0.5027, 0.4118, 0.3048, 0.4385, 0.6765, 0.4278, 0.6818, 0.5454, 0.3663,
0.4813, 0.5508, 0.2888, 0.4759, 0.5775, 0.5775, 0.5080, 0.6337, 0.4091,
0.5642, 0.5267

#Input gate
[0.4687, 0.4942, 0.5426, 0.6495, 0.5120, 0.3999, 0.5375, 0.5043, 0.2980,
         0.3897, 0.3490, 0.4992, 0.5476, 0.5145, 0.6164, 0.5247, 0.3999, 0.4330,
         0.4891, 0.4432]
#Cell gate
[ 0.6091, -0.0528,  0.1487,  0.4652,  0.0528, -0.0240,  0.5084, -0.5659,
          0.3069, -0.3933,  0.0767, -0.5755,  0.4029,  0.1343,  0.6091, -0.0767,
         -0.1822, -0.1343, -0.2830,  0.0432]

#Output gate
[0.6540, 0.7127, 0.6289, 0.6149, 0.3606, 0.3522, 0.4556, 0.4248, 0.4081,
         0.4444, 0.4975, 0.5087, 0.5450, 0.5925, 0.5898, 0.5450, 0.4416, 0.4360,
         0.4416, 0.3801]

#### PART 2 : Scan Node definition and Graph execution

1. We have defined the compute graph needed for processing of one input. This graph will now go an an input to the `body` attribute of the scan node.
2. Around this node, we will define the `h_t-1` and `c_t-1` states as inputs to the model (which get updated after the processing of each input.) and `X` which acts as our scan input. (Meaning in each iteration the scan node will extract one input {row/column} from the provided input `X` and use it for executing the graph.)
3.  We will also define three outputs `h_t`, `c_t` and `h_t_concat`. (`h_t_concat` will concatenate all the hidden states computed in each step.)
4. We then test the execution of this graph with `onnxruntime`

In [48]:
#Defining the input and output value info tensors for the scan_graph creation. These tensors act as the wrapper to the previously defined graph.

#Inputs
scan_input = make_tensor_value_info("scan_input",onnx.TensorProto.FLOAT, [None,10,1])#X ; scan input; Here None defines the varibale number of inputs that can be supplied for input processing.
inp_a      = make_tensor_value_info("inp_a",onnx.TensorProto.FLOAT, [20,1])# h_t-1
inp_b      = make_tensor_value_info("inp_b",onnx.TensorProto.FLOAT, [20,1])# c_t-1

#Outputs
out_a = make_tensor_value_info("out_a", onnx.TensorProto.FLOAT, [20,1])#h_t
out_b = make_tensor_value_info("out_b", onnx.TensorProto.FLOAT, [20,1])#c_t
out_c = make_tensor_value_info("out_c", onnx.TensorProto.FLOAT, [None,20,1])
#This can be 'o_t' and it can also concatenate the outputs of the intermediate hidden states. 
#In the onnx LSTM cell all the hidden states are concatenated and given as outputs. So to match that I am doing the same.
#For constants can define a constant tensor here. Maybe that will help solve this issue.
#Both the scan input and the scan output have the None shape as the first dimension of the tensor. This allows the execution of unknown outputs and removes all the warnings when the graph is serialized.

In [49]:
# Defining the scan node here now
scan_node_lstm = make_node(
    "Scan", 
    inputs=["inp_a","inp_b","scan_input"], 
    outputs=["out_a","out_b","out_c"], 
    num_scan_inputs=1,
    body=lstm_scan, domain=''
)
# The order in which the nodes are defined in the inputs and outputs also matter here and should match the order defined in the body graph.

In [50]:
# Define the graph for the scan node to execute it with onnxruntime.
scan_lstm_node_graph = make_graph(
    nodes = [scan_node_lstm],
    name="lstm-scan-node",
    inputs=[inp_a,inp_b,scan_input],#h_t-1, c_t-1, X
    outputs=[out_a,out_b,out_c]#h_t,c_t,h_t_concat
)
#Here, the scan input is 'scan_input' connected -> X in the compute graph. This will contain the input data that needs to be processed.
#The scan output is 'out_c' connected -> h_t_concat in the compute graph. out_a and out_b are connected to h_t-1 and c_t-1 in the compute graph and keep getting updated after each input is processed.

In [51]:
lstm_scan_node_model = qonnx_make_model(scan_lstm_node_graph, producer_name="LSTM-Scan")
onnx.save(lstm_scan_node_model, './lstm_scan_node_model.onnx')
print(lstm_scan_node_model)

ir_version: 8
producer_name: "LSTM-Scan"
graph {
  node {
    input: "inp_a"
    input: "inp_b"
    input: "scan_input"
    output: "out_a"
    output: "out_b"
    output: "out_c"
    op_type: "Scan"
    attribute {
      name: "body"
      g {
        node {
          input: "X"
          input: "inp_scale"
          input: "zero_point_all"
          output: "ql_input_out"
          name: "ql_input"
          op_type: "QuantizeLinear"
        }
        node {
          input: "ql_input_out"
          input: "scale_test"
          input: "zero_point_all"
          output: "dql_input_out"
          name: "dql_input"
          op_type: "DequantizeLinear"
        }
        node {
          input: "W_f"
          input: "scale_f"
          input: "zero_point_all"
          output: "ql_wf_out"
          name: "ql_w1"
          op_type: "QuantizeLinear"
        }
        node {
          input: "ql_wf_out"
          input: "min"
          input: "max"
          output: "clp_wf"
          nam

In [52]:
#Checking the model for any errors
onnx.checker.check_model(lstm_scan_node_model)
print(lstm_scan_node_model.graph.value_info)

[]


In [53]:
showInNetron('./lstm_scan_node_model.onnx',localhost_url='xirxlabs53')#localhost_url='xirxlabs53'

Stopping http://0.0.0.0:5901
Serving './lstm_scan_node_model.onnx' at http://0.0.0.0:5901


In [54]:
#Have to convert the opset version of the graph here because the clip operator in the previous version did not allow for INT8 inputs.
# It only allowed for FLOAT inputs.
from onnx import version_converter, helper
lstm_scan_node_model_14 = version_converter.convert_version(lstm_scan_node_model, 14)
print(lstm_scan_node_model_14)

ir_version: 8
producer_name: "LSTM-Scan"
graph {
  node {
    input: "inp_a"
    input: "inp_b"
    input: "scan_input"
    output: "out_a"
    output: "out_b"
    output: "out_c"
    op_type: "Scan"
    attribute {
      name: "body"
      g {
        node {
          input: "X"
          input: "inp_scale"
          input: "zero_point_all"
          output: "ql_input_out"
          name: "ql_input"
          op_type: "QuantizeLinear"
        }
        node {
          input: "ql_input_out"
          input: "scale_test"
          input: "zero_point_all"
          output: "dql_input_out"
          name: "dql_input"
          op_type: "DequantizeLinear"
        }
        node {
          input: "W_f"
          input: "scale_f"
          input: "zero_point_all"
          output: "ql_wf_out"
          name: "ql_w1"
          op_type: "QuantizeLinear"
        }
        node {
          input: "ql_wf_out"
          input: "min"
          input: "max"
          output: "clp_wf"
          nam

In [55]:
# Testing to check if the model is serializing without errors or warnings

sess = rt.InferenceSession(lstm_scan_node_model_14.SerializeToString())
#Here, the most common error that you can get it the source and target dimension mismatch for the source input X or for some other input defined in the scan node.
#The error comes when the order of the inputs defined while making the graph for the 'scan_body' does not match the order of the inputs while defiing the scan node or the scan graph [Not sure which of the two affects the results verify that.]
#So by changing the order of the inputs in the body of the scan graph to match the inputs of the scan node or graph solves the error. 

2023-07-11 15:11:56.479292839 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'scale_all'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:56.482526716 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'U_c'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:56.482545727 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'ql_wo_out'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:56.482556205 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'W_c'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:56.482565186 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'W_f'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:56.482573890 [W:onnxr

In [56]:
# Defining the values of the varibales to test the execution of the onnx model
in1_inpa =  np.zeros((20, 1)).astype(np.float32)#'h_t-1'
in2_inpb = np.zeros((20, 1)).astype(np.float32)#'c_t-1'
# in3_scan_input =  np.ones((5, 10, 1)).astype(np.float32)#'X' 10,1 : Because that is the way the shape of the model has been defined.
in3_scan_input = np.empty([5,10,1],dtype=np.float32).reshape([5,10,1])
in3_scan_input.fill(2)
print(in3_scan_input)
input_dict = {}
input_dict["inp_a"] = in1_inpa
input_dict["inp_b"] = in2_inpb
input_dict["scan_input"] = in3_scan_input

[[[2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]]

 [[2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]]

 [[2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]]

 [[2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]]

 [[2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]
  [2.]]]


In [57]:
#Executing the onnx model here.
sess = rt.InferenceSession(lstm_scan_node_model_14.SerializeToString())
output = sess.run(None, input_dict)
print("Final Hidden State = ", output[0])
print("Final Cell State = ", output[1])
print("All Hidden States = ", output[2])

Final Hidden State =  [[ 0.2054974 ]
 [-0.03244696]
 [ 0.01802609]
 [ 0.2054974 ]
 [ 0.04686783]
 [-0.0216313 ]
 [ 0.17665565]
 [-0.15141913]
 [ 0.05768348]
 [-0.12618262]
 [ 0.02523652]
 [-0.18386608]
 [ 0.18386608]
 [ 0.10094609]
 [ 0.19468175]
 [ 0.01802609]
 [-0.02523652]
 [-0.03605217]
 [-0.06489392]
 [ 0.00721043]]
Final Cell State =  [[ 0.38238803]
 [-0.05462686]
 [ 0.02979647]
 [ 0.4320488 ]
 [ 0.10925373]
 [-0.05462686]
 [ 0.6008955 ]
 [-0.5115061 ]
 [ 0.12415196]
 [-0.2731343 ]
 [ 0.04966078]
 [-0.39232022]
 [ 0.35755765]
 [ 0.1837449 ]
 [ 0.6704205 ]
 [ 0.02979647]
 [-0.05462686]
 [-0.07945725]
 [-0.14401627]
 [ 0.01986431]]
All Hidden States =  [[[ 0.12978783]
  [-0.00360522]
  [ 0.03244696]
  [ 0.12618262]
  [ 0.01442087]
  [ 0.00360522]
  [ 0.10094609]
  [-0.09013043]
  [ 0.03605217]
  [-0.05768348]
  [ 0.01802609]
  [-0.11176174]
  [ 0.09734087]
  [ 0.02523652]
  [ 0.1442087 ]
  [-0.01442087]
  [-0.0216313 ]
  [-0.00721043]
  [-0.04326261]
  [ 0.00721043]]

 [[ 0.1802608

2023-07-11 15:11:57.783645469 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'scale_all'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:57.786866488 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'U_c'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:57.786884037 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'ql_wo_out'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:57.786894411 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'W_c'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:57.786903517 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'W_f'. It is not used by any node and should be removed from the model.
2023-07-11 15:11:57.786912164 [W:onnxr

In [58]:
brevitas_out = np.load('./hidden_2_input.npy')
print(brevitas_out[0])
scan_full_out = output[2].reshape([5,1,20])
print(scan_full_out[0])
comp = scan_full_out - brevitas_out
# print(comp)
round_comp = np.round(comp/0.005)

print(round_comp)

[[ 0.2850537  -0.07969244  0.12566884  0.3892669   0.00306509 -0.01532547
   0.16551505 -0.17777543  0.02452075 -0.08275753  0.00306509 -0.18390562
   0.21149147  0.11034337  0.3862018  -0.06743206 -0.05210659 -0.06436697
  -0.10114809  0.        ]]
[[ 0.12978783 -0.00360522  0.03244696  0.12618262  0.01442087  0.00360522
   0.10094609 -0.09013043  0.03605217 -0.05768348  0.01802609 -0.11176174
   0.09734087  0.02523652  0.1442087  -0.01442087 -0.0216313  -0.00721043
  -0.04326261  0.00721043]]
[[[-31.  15. -19. -53.   2.   4. -13.  18.   2.   5.   3.  14. -23. -17.
   -48.  11.   6.  11.  12.   1.]]

 [[-41.  24. -12. -66.   1.   6. -22.  19.   3.   7.   6.  10. -32. -35.
   -66.   9.  12.  12.  17.   2.]]

 [[-45.  31. -11. -73.   1.   7. -34.  22.   4.   6.   7.   7. -35. -49.
   -77.   5.  17.  14.  22.   2.]]

 [[-46.  34. -12. -77.  -2.   6. -41.  23.   5.   6.   7.   5. -34. -58.
   -83.   0.  21.  13.  25.   1.]]

 [[-47.  35. -12. -79.  -2.   7. -45.  25.   4.   4.   8.   5. -

In [None]:
print(brevitas_out)

#### PART 3 : Comparing the output of the above defined LSTM-SCAN node with the ONNX LSTM node.

##### Test settings

1. The `input length` = 10, `hidden_size` = 20.
2. Values of the `weight matrix` and the `recurrence weight matrix` are 1.
3. The input vector `X` is 1.
4. The value of the `weight biases` = 1 and those of the `recurrence bias` = 0.
5. The initial values of the `hidden_state` and the `cell_state` = 0.
6. The LSTM node is defined without peepholes.

These are the same settings as defined for the above LSTM-Scan node.

In [2]:
# Defining the inputs and outputs of the graph we need to create for the graph of the scan body.
#-----------------------------------------------------------------------
# Defining the inputs value info tensors for the compute to be executed for each input.
W = make_tensor_value_info("W", onnx.TensorProto.FLOAT, [1,80,10]) #Weight Matrix
R = make_tensor_value_info("R", onnx.TensorProto.FLOAT, [1,80,20]) #Recurrance Matrix
X_inp = make_tensor_value_info("X_inp", onnx.TensorProto.FLOAT, [None,1,10]) #Inputs
B = make_tensor_value_info("B", onnx.TensorProto.FLOAT, [1,160]) #Weight biases and recurrence biases concatenated

#Output value info tensor definitions

Y = make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [None,1,1,20])
Y_h = make_tensor_value_info("Y_h", onnx.TensorProto.FLOAT, [1,1,20])
Y_c = make_tensor_value_info("Y_c", onnx.TensorProto.FLOAT, [1,1,20])

In [3]:
#Defining the onnx LSTM node here.
onnx_lstm = make_node(
    "LSTM",
    inputs=["X_inp","W","R","B"],
    outputs=["Y","Y_h","Y_c"],
    hidden_size=20,
)

In [4]:
#Defining the onnx graph of the LSTM node to execute with onnx runtime
onnx_lstm_graph=make_graph(
    nodes =[onnx_lstm],
    inputs=[X_inp,W,R,B],
    outputs=[Y,Y_h,Y_c],
    name='onnx-lstm-graph'
)

In [5]:
onnx_lstm_model = qonnx_make_model(onnx_lstm_graph, producer_name="LSTM-Scan")
onnx.save(onnx_lstm_model, './onnx_lstm_model.onnx')
print(onnx_lstm_model)

ir_version: 8
producer_name: "LSTM-Scan"
graph {
  node {
    input: "X_inp"
    input: "W"
    input: "R"
    input: "B"
    output: "Y"
    output: "Y_h"
    output: "Y_c"
    op_type: "LSTM"
    attribute {
      name: "hidden_size"
      i: 20
      type: INT
    }
  }
  name: "onnx-lstm-graph"
  input {
    name: "X_inp"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
          }
          dim {
            dim_value: 1
          }
          dim {
            dim_value: 10
          }
        }
      }
    }
  }
  input {
    name: "W"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 1
          }
          dim {
            dim_value: 80
          }
          dim {
            dim_value: 10
          }
        }
      }
    }
  }
  input {
    name: "R"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 1
          }
          dim {

In [6]:
# Checking the defined model for any errors
onnx.checker.check_model(onnx_lstm_model)
print(onnx_lstm_model.graph.value_info)

[]


In [7]:
showInNetron('./onnx_lstm_model.onnx',localhost_url='xirxlabs53')#,localhost_url='xirxlabs53'

OSError: [Errno 98] Address already in use

In [None]:
# Testing to check if the model is serializing without errors or warnings
sess = rt.InferenceSession(onnx_lstm_model.SerializeToString())

In [None]:
# Defining the values of the varibales to test the execution of the onnx model
W_val = np.empty((1, 80, 10)).astype(np.float32)
W_val.fill(0)
R_val = np.empty((1, 80, 20)).astype(np.float32)
R_val.fill(2)
X_val = np.empty((5, 1, 10)).astype(np.float32)
X_val.fill(1)
W_B = np.ones((1, 80)).astype(np.float32)
R_B = np.zeros((1, 80)).astype(np.float32)
B_val = np.concatenate((W_B, R_B), 1)

#'initial_h' and 'initial_c' are also variables and if they are not initialized then they are considered '0'

input_dict = {}
input_dict["W"] = W_val
input_dict["R"] = R_val
input_dict["X_inp"] = X_val
input_dict["B"] = B_val

In [None]:
#Executing the onnx model here.
sess = rt.InferenceSession(onnx_lstm_model.SerializeToString())
output = sess.run(None, input_dict)
print("Final Hidden State = ", output[1])
print("Final Cell State = ", output[2])
print("All Hidden States = ", output[0])