## Setting Up All Artifacts details

In [None]:
import os
os.environ['SNPE_ROOT']="/local/mnt/workspace/aditya/qaisw-v2.15.1.230926150623_62883"#set up your snpe path here.
os.environ['RAW_FILE_FOLDER']="raw"
os.environ['FOLDER_WITH_ARTIFACTS']="BertBase"
os.environ['DLCFP16']="models/BertBase_fp16.dlc"
os.environ['DLCW16A16']="models/BertBasew16a16_offline.dlc"
os.environ['DLCFP32']="models/BertBase_fp32.dlc"
os.environ['TARGET_INPUT_LIST']="tf_raw_list.txt"
os.environ['ONDEVICE_FOLDER']="BertBase_device"
os.environ['DEVICE_HOST']="localhost"
os.environ['DEVICE_ID']="2dce6316" #fill your device-id. Use command "adb devices" to get devices names. example :"e18d5d0"
os.environ['SNPE_TARGET_ARCH']="aarch64-android"
os.environ['SNPE_TARGET_STL']="libc++_shared.so"
os.environ['SNPE_TARGET_DSPARCH']="hexagon-v73"

## Downloading Data

In [None]:
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json

In [None]:
import json
import pandas as pd
data_path="dev-v2.0.json"
with open(data_path,"r") as f:
    squad_data=json.load(f)
context_qa_triples=[]
for article in squad_data['data']:
    for paragraph in article['paragraphs']:
        context=paragraph['context']
        for qa in paragraph['qas']:
            question=qa['question']
            if qa['answers']:
                answer=qa['answers'][0]['text']
            elif qa['plausible_answers']:
                plausible_answers=qa['plausible_answers']
                answer=plausible_answers[0]['text']
            else:
                answer=''

            context_qa_triples.append({'context':context,'question':question,'answers':answer})
df=pd.DataFrame(context_qa_triples[:30])
df.head(3)

In [None]:
!pip install optimum
!pip install sentencepiece

## Converting the Model to ONNX format using optimum

In [None]:
%%bash
optimum-cli export onnx --model deepset/bert-base-cased-squad2 models/

## Getting the Model Input Names

In [None]:
import onnxruntime
model_path='models/model.onnx'
sess=onnxruntime.InferenceSession(model_path)
input_layer_names=sess.get_inputs()
for input_layer in input_layer_names:
    print(input_layer)

## Converting ONNX to FP32 Precision

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-onnx-to-dlc -i models/model.onnx -d input_ids 1,384 -d attention_mask 1,384 -d token_type_ids 1,384 -o models/BertBase_fp32.dlc

### Creating  the RAW Files 

In [None]:
%%bash
mkdir input_ids
mkdir attention_mask
mkdir token_type_ids

In [None]:
import numpy as np
from transformers import AutoTokenizer, AlbertForQuestionAnswering
import torch
tokenizer = AutoTokenizer.from_pretrained("deepset/bert-base-cased-squad2")
question_token={}
for i in range(df.shape[0]):
    question,text,answer=df.iloc[i].question,df.iloc[i].context,df.iloc[i].answers
    inputs = tokenizer(question, text, return_tensors="np",
            padding='max_length',
            truncation="longest_first",
            max_length=384)
    question_token[i]=[question,inputs,answer,text]
    inp_ids = inputs.input_ids
    inp_ids=inp_ids.astype(np.float32)
    with open("input_ids/inp_ids_"+str(i)+".raw", 'wb') as f:
        inp_ids.tofile(f)
    
    mask = inputs.attention_mask
    mask=mask.astype(np.float32)
    with open("attention_mask/attn_mask_"+str(i)+".raw", 'wb') as f:
        mask.tofile(f)

    token_type= inputs.token_type_ids
    token_type=token_type.astype(np.float32)
    with open("token_type_ids/token_type_id_"+str(i)+".raw", 'wb') as f:
        token_type.tofile(f)

## Creating List Files

In [None]:
total_iter = 30
print("Generating input_list \"small_raw_list.txt\" with {} iterations".format(total_iter))
with open("tf_raw_list.txt",'w') as f:
    for i in range(total_iter):
        f.write("input_ids:=input_ids/inp_ids_{}.raw attention_mask:=attention_mask/attn_mask_{}.raw token_type_ids:=token_type_ids/token_type_id_{}.raw\n".format(i,i,i))

## Creating W16A16 Precision Model

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-quantize --input_dlc models/BertBase_fp32.dlc --input_list tf_raw_list.txt --use_enhanced_quantizer --use_adjusted_weights_quantizer  --output_dlc models/BertBase_w16a16.dlc --enable_htp --htp_socs sm8550 --weights_bitwidth 16 --act_bitwidth 16

## Offline Preparation of W16A16 Precision

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-graph-prepare --input_dlc models/BertBase_w16a16.dlc --output_dlc models/BertBase_w16a16_offline.dlc --set_output_tensors start_logits,end_logits

## Creating FP16 Precision

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-graph-prepare --input_dlc models/BertBase_fp32.dlc --use_float_io --output_dlc models/BertBase_fp16.dlc --set_output_tensors start_logits,end_logits