## Setting Up All Artifacts details

In [None]:
import os
os.environ['SNPE_ROOT']="/local/mnt/workspace/aditya/qaisw-v2.15.1.230926150623_62883"#set up your snpe path here.
os.environ['RAW_FILE_FOLDER']="raw"
os.environ['FOLDER_WITH_ARTIFACTS']="Distilbert"
os.environ['DLCFP16']="models/Distilbert_fp16.dlc"
os.environ['DLCW16A16']="models/Distilbert_w16a16_offline.dlc"
os.environ['DLCFP32']="models/Distilbert_fp32.dlc"
os.environ['TARGET_INPUT_LIST']="tf_raw_list.txt"
os.environ['ONDEVICE_FOLDER']="Distilbert_device"
os.environ['DEVICE_HOST']="localhost"
os.environ['DEVICE_ID']="2dce6316" #fill your device-id. Use command "adb devices" to get devices names. example :"e18d5d0"
os.environ['SNPE_TARGET_ARCH']="aarch64-android"
os.environ['SNPE_TARGET_STL']="libc++_shared.so"
os.environ['SNPE_TARGET_DSPARCH']="hexagon-v73"

## Downloading Data

In [None]:
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json

In [None]:
import json
import pandas as pd
data_path="dev-v2.0.json"
with open(data_path,"r") as f:
    squad_data=json.load(f)
context_qa_triples=[]
for article in squad_data['data']:
    for paragraph in article['paragraphs']:
        context=paragraph['context']
        for qa in paragraph['qas']:
            question=qa['question']
            if qa['answers']:
                answer=qa['answers'][0]['text']
            elif qa['plausible_answers']:
                plausible_answers=qa['plausible_answers']
                answer=plausible_answers[0]['text']
            else:
                answer=''

            context_qa_triples.append({'context':context,'question':question,'answers':answer})
df=pd.DataFrame(context_qa_triples[:30])
df.head(3)

In [None]:
!pip install optimum
!pip install sentencepiece

## Converting the Model to ONNX format using optimum

In [None]:
%%bash
optimum-cli export onnx --model distilbert-base-uncased-distilled-squad models/

### Getting Model Input name

In [None]:
import onnxruntime
model_path='models/model.onnx'
sess=onnxruntime.InferenceSession(model_path)
input_layer_names=sess.get_inputs()
for input_layer in input_layer_names:
    print(input_layer)

## Converting ONNX to FP32 Precision

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-onnx-to-dlc -i models/model.onnx -d input_ids 1,384 -d attention_mask 1,384 -o models/Distilbert_fp32.dlc

### Creating  the RAW Files

In [None]:
%%bash
mkdir input_ids
mkdir attention_mask

In [None]:
from sklearn.metrics import f1_score
def f1_scores_custom(prediction,ground_truth):
    prediction_tokens=prediction.lower().split()
    ground_truth_tokens=ground_truth.lower().split()
    common_tokens=[token for token in prediction_tokens if token in ground_truth_tokens]  
    if (len(prediction_tokens)==0 and len(ground_truth_tokens)==0):
        return [1.0,1.0,1.0]
    elif len(prediction_tokens)==0 or len(ground_truth_tokens)==0:
        return [0.0,0.0,0.0]
    precision=len(common_tokens)/len(prediction_tokens)
    recall=len(common_tokens)/len(ground_truth_tokens)

    if precision+recall==0:
        return [0.0,0.0,0.0]
    f1= 2*(precision*recall)/(precision+recall)
    
    return [f1,precision,recall]

In [None]:
from transformers import DistilBertTokenizer, TFDistilBertForQuestionAnswering
import tensorflow as tf
import numpy as np
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")
f1_scores,precision_scores,recall_scores=[],[],[]
question_answer={}
for i in range(df.shape[0]):
    question,text,answer=df.iloc[i].question,df.iloc[i].context,df.iloc[i].answers
    inputs = tokenizer(question, text, return_tensors="np",
            padding='max_length',
            truncation="longest_first",
            max_length=384)
    outputs = model(**inputs)
    answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
    answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
    
    predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    predicted_answer=tokenizer.decode(predict_answer_tokens)
    question_answer[question]=predicted_answer
    f1,precision,recall=f1_scores_custom(predicted_answer,answer)
    f1_scores.append(f1)
    precision_scores.append(precision)
    recall_scores.append(recall)
mean_f1_score=np.mean(f1_scores)
mean_precision_score=np.mean(precision_scores)
mean_recall_score=np.mean(recall_scores)
mean_f1_score,mean_recall_score,mean_precision_score

## Creating List Files

In [None]:
import numpy as np
from transformers import DistilBertTokenizer, TFDistilBertForQuestionAnswering
import tensorflow as tf
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
question_token={}
for i in range(df.shape[0]):
    question,text,answer=df.iloc[i].question,df.iloc[i].context,df.iloc[i].answers
    inputs = tokenizer(question, text, return_tensors="np",
            padding='max_length',
            truncation="longest_first",
            max_length=384)
    question_token[i]=[question,inputs,answer,text]
    inp_ids = inputs.input_ids
    inp_ids=inp_ids.astype(np.float32)
    with open("input_ids/inp_ids_"+str(i)+".raw", 'wb') as f:
        inp_ids.tofile(f)
    
    mask = inputs.attention_mask
    mask=mask.astype(np.float32)
    with open("attention_mask/attn_mask_"+str(i)+".raw", 'wb') as f:
        mask.tofile(f)

In [None]:
# <input_layer_name>:=<input_layer_path>[<space><input_layer_name>:=<input_layer_path>]
total_iter = 30
print("Generating input_list \"small_raw_list.txt\" with {} iterations".format(total_iter))
with open("tf_raw_list.txt",'w') as f:
    for i in range(total_iter):
        f.write("input_ids:=input_ids/inp_ids_{}.raw attention_mask:=attention_mask/attn_mask_{}.raw\n".format(i,i)) # add token mask if needed

## Creating W16A16 Precision Model

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-quantize --input_dlc models/Distilbert_fp32.dlc --input_list tf_raw_list.txt --use_enhanced_quantizer --use_adjusted_weights_quantizer  --output_dlc models/Distilbert_w16a16.dlc --enable_htp --htp_socs sm8550 --weights_bitwidth 16 --act_bitwidth 16

## Offline Preparation of W16A16 Precision

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-graph-prepare --input_dlc models/Distilbert_w16a16.dlc --output_dlc models/Distilbert_w16a16_offline.dlc --set_output_tensors start_logits,end_logits

## Creating FP16 Precision

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-graph-prepare --input_dlc models/Distilbert_fp32.dlc --use_float_io --output_dlc models/Distilbert_fp16.dlc --set_output_tensors start_logits,end_logits